diff --git a/.gitignore b/.gitignore
index c2ed4ec..0544d50 100644
--- a/.gitignore
+++ b/.gitignore
@@ -114,3 +114,7 @@ all.config
 
 # Kdevelop4
 *.kdev4
+
+# dtb objects
+*.dtb
+*.dtbo
diff --git b/Documentation/ABI/testing/sysfs-devices-platform-bone_capemgr b/Documentation/ABI/testing/sysfs-devices-platform-bone_capemgr
new file mode 100644
index 0000000..e2df613
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-bone_capemgr
@@ -0,0 +1,63 @@
+What:		/sys/devices/platform/bone_capemgr/slots
+Date:		May 2015
+KernelVersion:	4.0
+Contact:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+Description:
+		READ:
+		  Describe the state of all the slots of the beaglebone capemgr.
+		  Each line of the output describes a slot:
+		  The slot format is as following:
+		  <slot-id>: [P-][F-][O-][l-][L-][D-] \
+			  <overlay-id> <board-name>,<version>,
+			  <manufacturer>,<part-number>
+
+		Where the flags are:
+		P: Slot has been probed
+		F: Slot has failed probing (i.e. no EEPROM detected)
+		O: Slot has been overridden by the user
+		l: Slot is current loading
+		L: Slot has completed loading and is ready
+		D: Slot has been disabled
+
+		Example:
+		0: P---L-  -1 BeagleBone RS232 CAPE,00A1,Beagleboardtoys,BB-BONE-SERL-03
+		1: PF----  -1
+		2: PF----  -1
+		3: PF----  -1
+
+		WRITE:
+		  Writing a string of the form <part-number>[:version] issues a request to
+		  load a firmware blob containing an overlay. The name of the firmware blob
+		  is <part-number>-[version|00A0].dtbo. This act is defined as a slot override.
+
+		  Writing a negative slot id removes the slot if it was an overridden one, or
+		  unloads a slot that was probed.
+
+What:		/sys/devices/platform/bone_capemgr/baseboard/<eeprom-field>
+Date:		May 2015
+KernelVersion:	4.0
+Contact:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+Description:	Contains the probed base board EEPROM field; one of:
+		board-name		- board-name as stored in cape EEPROM
+		dc-supplied		- whether the cape draws or supplies DC
+		eeprom-format-revision	- EEPROM format rev, only 00A0 supported
+		header			- header; should be 'aa 55 33 ee'
+		manufacturer		- manufacturer string
+		part-number		- part-number of the cape
+		serial-number		- serial number of the cape
+		version			- version of the cape, i.e. 00A0
+		number-of-pins		- displayed but ignored
+		pin-usage		- displayed but ignored
+		sys-5v			- displayed but ignored
+		vdd-3v3exp		- displayed but ignored
+		vdd-5v			- displayed but ignored
+What:		/sys/devices/platform/bone_capemgr/slot-<n>/<eeprom-field>
+Date:		May 2015
+KernelVersion:	4.0
+Contact:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+Description:	Contains the probed cape's EEPROM field; the field is one of:
+		board-name		- baseboard name i.e. A335BNLT
+		header			- header; should be 'aa 55 33 ee'
+		revision		- baseboard revision
+		serial-number		- baseboard serial number
+		config-option		- displayed but ignored
diff --git b/Documentation/ABI/testing/sysfs-firmware-devicetree-overlays b/Documentation/ABI/testing/sysfs-firmware-devicetree-overlays
new file mode 100644
index 0000000..88d1549
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-devicetree-overlays
@@ -0,0 +1,52 @@
+What:		/sys/firmware/devicetree/overlays/
+Date:		October 2015
+Contact:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+Description:
+		This directory contains the applied device tree overlays of
+		the running system, as directories of the overlay id.
+
+What:		/sys/firmware/devicetree/overlays/enable
+Date:		October 2015
+Contact:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+Description:
+		The master enable switch, by default is 1, and when
+		set to 0 it cannot be re-enabled for security reasons.
+
+		The discussion about this switch takes place in:
+		http://comments.gmane.org/gmane.linux.drivers.devicetree/101871
+
+		Kees Cook:
+		"Coming from the perspective of drawing a bright line between
+		kernel and the root user (which tends to start with disabling
+		kernel module loading), I would say that there at least needs
+		to be a high-level one-way "off" switch for the interface so
+		that systems that have this interface can choose to turn it off
+		during initial boot, etc."
+
+What:		/sys/firmware/devicetree/overlays/<id>
+Date:		October 2015
+Contact:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+Description:
+		Each directory represents an applied overlay, containing
+		the following attribute files.
+
+What:		/sys/firmware/devicetree/overlays/<id>/can_remove
+Date:		October 2015
+Contact:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+Description:
+		The attribute set to 1 means that the overlay can be removed,
+		while 0 means that the overlay is being overlapped therefore
+		removal is prohibited.
+
+What:		/sys/firmware/devicetree/overlays/<id>/<fragment-name>/
+Date:		October 2015
+Contact:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+Description:
+		Each of these directories contain information about of the
+		particular overlay fragment.
+
+What:		/sys/firmware/devicetree/overlays/<id>/<fragment-name>/target
+Date:		October 2015
+Contact:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+Description:
+		The full-path of the target of the fragment
diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
index f53e2ee..29bc4fe 100644
--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
@@ -24,6 +24,8 @@ Optional properties:
 - ti,no-reset-on-init: When present, the module should not be reset at init
 - ti,no-idle-on-init: When present, the module should not be idled at init
 - ti,no-idle: When present, the module is never allowed to idle.
+- ti,deassert-hard-reset: list of hwmod and hardware reset line name pairs
+  (ascii strings) to be deasserted upon device instantiation.
 
 Example:
 
diff --git b/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
new file mode 100644
index 0000000..7a31864
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
@@ -0,0 +1,132 @@
+TI CPUFreq and OPP bindings
+================================
+
+Certain TI SoCs, like those in the am335x, am437x, am57xx, and dra7xx
+families support different OPPs depending on the silicon variant in use.
+The ti_cpufreq driver can use revision and an efuse value from the SoC to
+provide the OPP framework with supported hardware information. This is
+used to determine which OPPs from the operating-points-v2 table get enabled
+when it is parsed by the OPP framework.
+
+Required properties:
+--------------------
+In 'cpus' nodes:
+- operating-points-v2: Phandle to the operating-points-v2 table to use.
+
+In 'operating-points-v2' table:
+- compatible: Should be
+	- 'operating-points-v2-ti' for am335x, am43xx, and dra7xx/am57xx SoCs
+- ti,syscon-efuse: Syscon phandle, offset to efuse register, efuse register
+		   mask, and efuse register shift to get the relevant bits
+		   that describe OPP availability.
+- ti,syscon-rev: Syscon and offset used to look up revision value on SoC.
+
+Optional properties:
+--------------------
+For each opp entry in 'operating-points-v2' table:
+- opp-supported-hw: Two bitfields indicating:
+	1. Which revision of the SoC the OPP is supported by
+	2. Which eFuse bits indicate this OPP is available
+
+	A bitwise AND is performed against these values and if any bit
+	matches, the OPP gets enabled. Not providing the property for an
+	entry indicates that an OPP is always supported.
+
+Example:
+--------
+
+/* From arch/arm/boot/dts/am33xx.dtsi */
+cpus {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	cpu@0 {
+		compatible = "arm,cortex-a8";
+		device_type = "cpu";
+		reg = <0>;
+
+		operating-points-v2 = <&cpu0_opp_table>;
+
+		clocks = <&dpll_mpu_ck>;
+		clock-names = "cpu";
+
+		clock-latency = <300000>; /* From omap-cpufreq driver */
+	};
+};
+
+/*
+ * cpu0 has different OPPs depending on SoC revision and some on revisions
+ * 0x2 and 0x4 have eFuse bits that indicate if they are available or not
+ */
+cpu0_opp_table: opp_table0 {
+	compatible = "operating-points-v2-ti-am3352-cpu";
+	ti,syscon-efuse = <&scm_conf 0x7fc 0x1fff 0>;
+	ti,syscon-rev = <&scm_conf 0x600>;
+
+	/*
+	 * The three following nodes are marked with opp-suspend
+	 * because they can not be enabled simultaneously on a
+	 * single SoC.
+	 */
+	opp50@300000000 {
+		opp-hz = /bits/ 64 <300000000>;
+		opp-microvolt = <950000 931000 969000>;
+		opp-supported-hw = <0x06 0x0010>;
+		opp-suspend;
+	};
+
+	opp100@275000000 {
+		opp-hz = /bits/ 64 <275000000>;
+		opp-microvolt = <1100000 1078000 1122000>;
+		opp-supported-hw = <0x01 0x00FF>;
+		opp-suspend;
+	};
+
+	opp100@300000000 {
+		opp-hz = /bits/ 64 <300000000>;
+		opp-microvolt = <1100000 1078000 1122000>;
+		opp-supported-hw = <0x06 0x0020>;
+		opp-suspend;
+	};
+
+	opp100@500000000 {
+		opp-hz = /bits/ 64 <500000000>;
+		opp-microvolt = <1100000 1078000 1122000>;
+		opp-supported-hw = <0x01 0xFFFF>;
+	};
+
+	opp100@600000000 {
+		opp-hz = /bits/ 64 <600000000>;
+		opp-microvolt = <1100000 1078000 1122000>;
+		opp-supported-hw = <0x06 0x0040>;
+	};
+
+	opp120@600000000 {
+		opp-hz = /bits/ 64 <600000000>;
+		opp-microvolt = <1200000 1176000 1224000>;
+		opp-supported-hw = <0x01 0xFFFF>;
+	};
+
+	opp120@720000000 {
+		opp-hz = /bits/ 64 <720000000>;
+		opp-microvolt = <1200000 1176000 1224000>;
+		opp-supported-hw = <0x06 0x0080>;
+	};
+
+	oppturbo@720000000 {
+		opp-hz = /bits/ 64 <720000000>;
+		opp-microvolt = <1260000 1234800 1285200>;
+		opp-supported-hw = <0x01 0xFFFF>;
+	};
+
+	oppturbo@800000000 {
+		opp-hz = /bits/ 64 <800000000>;
+		opp-microvolt = <1260000 1234800 1285200>;
+		opp-supported-hw = <0x06 0x0100>;
+	};
+
+	oppnitro@1000000000 {
+		opp-hz = /bits/ 64 <1000000000>;
+		opp-microvolt = <1325000 1298500 1351500>;
+		opp-supported-hw = <0x04 0x0200>;
+	};
+};
diff --git b/Documentation/devicetree/bindings/misc/bone_capemgr.txt b/Documentation/devicetree/bindings/misc/bone_capemgr.txt
new file mode 100644
index 0000000..7e4fbc9
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/bone_capemgr.txt
@@ -0,0 +1,111 @@
+* Beaglebone cape manager driver
+
+Required properties:
+- compatible: "ti,bone-capemgr"
+- eeprom: phandle to the EEPROM baseboard.
+          The EEPROM framework interface is use to obtain the data.
+
+Required children nodes:
+
+- baseboardmaps: Contains nodes, which each of the them defines a mapping from
+		 the baseboard EEPROM board-name ID to a DT friendly compatible
+		 string.
+
+  - board-name:      The baseboard EEPROM board name, i.e. A335BONE for the
+                     original beaglebone white.
+  - compatible-name: The DT friendly compatible string to be used for matching
+		     compatible capes, i.e. "ti,beaglebone"
+
+
+ - nvmem-cells: Defines the phandles of the nvmem cells of the baseboard and the
+                slots.
+ - nvmem-cells: Defines the names of the nvmem cells. Required to have at
+                least a baseboard cell name.
+
+ - #slots:	Defines how many slots are there.
+
+- Example of a beaglebone cape-manager:
+
+bone_capemgr {
+	compatible = "ti,bone-capemgr";
+	status = "okay";
+
+	nvmem-cell = <&baseboard_data
+		      &cape0_data &cape1_data &cape2_data &cape3_data>;
+	nvmem-cell-names = "baseboard", "slot0", "slot1", "slot2", "slot3";
+
+	#slots = <4>;
+
+	/* map board revisions to compatible definitions */
+	baseboardmaps {
+		baseboard_beaglebone: board@0 {
+			board-name = "A335BONE";
+			compatible-name = "ti,beaglebone";
+		};
+
+		baseboard_beaglebone_black: board@1 {
+			board-name = "A335BNLT";
+			compatible-name = "ti,beaglebone-black";
+		};
+	};
+};
+
+The format of the cape to be loaded is in a standard overlay format with
+the following root properties that are interpreted by the cape manager:
+
+Required properties:
+ - compatible: Should be compatible to the baseboard according to the
+               baseboard map value, i.e. "ti,beaglebone".
+ - part-numer: Should contain the part-number as stored in the EEPROM.
+ - version:    Should contain a list of all the version that are supported
+               by the single cape dtbo, i.e. "00A1".
+
+Optional properties:
+ - exclusive-use: A string list which state the resources this cape requires.
+                  No processing or matching to anything regarding the internal
+		  kernel state is performed; it's purpose is to guard against
+		  conflicts with other capes.
+ - priority:      A priority to be assigned when loading a cape. A lower value
+                  has higher priority. The purpose of the priority is to control
+		  which cape is loaded first in case of a conflict.
+
+- Example of a serial cape:
+
+/dts-v1/;
+/plugin/;
+/ {
+        compatible = "ti,beaglebone", "ti,beaglebone-black";
+
+        /* identification */
+        part-number = "BB-BONE-SERL-03";
+        version = "00A1";
+
+        /* state the resources this cape uses */
+        exclusive-use =
+                /* the pin header uses */
+                "P9.21",        /* uart2_txd */
+                "P9.22",        /* uart2_rxd */
+                /* the hardware ip uses */
+                "uart2";
+
+        fragment@0 {
+                target = <&am33xx_pinmux>;
+                __overlay__ {
+                        bb_uart2_pins: pinmux_bb_uart2_pins {
+                                pinctrl-single,pins = <
+                                        0x150 0x21      /* spi0_sclk.uart2_rxd | MODE1 */
+                                        0x154 0x01      /* spi0_d0.uart2_txd | MODE1 */
+                                >;
+                        };
+                };
+        };
+
+        fragment@1 {
+                target = <&uart2>;
+                __overlay__ {
+                        status = "okay";
+                        pinctrl-names = "default";
+                        pinctrl-0 = <&bb_uart2_pins>;
+                };
+        };
+};
diff --git b/Documentation/devicetree/bindings/net/allwinner,sun8i-emac.txt b/Documentation/devicetree/bindings/net/allwinner,sun8i-emac.txt
new file mode 100644
index 0000000..4bf4e53
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-emac.txt
@@ -0,0 +1,65 @@
+* Allwinner sun8i EMAC ethernet controller
+
+Required properties:
+- compatible: "allwinner,sun8i-a83t-emac", "allwinner,sun8i-h3-emac",
+		or "allwinner,sun50i-a64-emac"
+- reg: address and length of the register sets for the device.
+- reg-names: should be "emac" and "syscon", matching the register sets
+- interrupts: interrupt for the device
+- clocks: A phandle to the reference clock for this device
+- clock-names: should be "ahb"
+- resets: A phandle to the reset control for this device
+- reset-names: should be "ahb"
+- phy-mode: See ethernet.txt
+- phy or phy-handle: See ethernet.txt
+- #address-cells: shall be 1
+- #size-cells: shall be 0
+
+"allwinner,sun8i-h3-emac" also requires:
+- clocks: an extra phandle to the reference clock for the EPHY
+- clock-names: an extra "ephy" entry matching the clocks property
+- resets: an extra phandle to the reset control for the EPHY
+- resets-names: an extra "ephy" entry matching the resets property
+
+See ethernet.txt in the same directory for generic bindings for ethernet
+controllers.
+
+The device node referenced by "phy" or "phy-handle" should be a child node
+of this node. See phy.txt for the generic PHY bindings.
+
+Optional properties:
+- phy-supply: phandle to a regulator if the PHY needs one
+- phy-io-supply: phandle to a regulator if the PHY needs a another one for I/O.
+		 This is sometimes found with RGMII PHYs, which use a second
+		 regulator for the lower I/O voltage.
+- allwinner,tx-delay: The setting of the TX clock delay chain
+- allwinner,rx-delay: The setting of the RX clock delay chain
+
+The TX/RX clock delay chain settings are board specific.
+
+Optional properties for "allwinner,sun8i-h3-emac":
+- allwinner,use-internal-phy: Use the H3 SoC's internal E(thernet) PHY
+- allwinner,leds-active-low: EPHY LEDs are active low
+
+When the internal PHY is requested, the implementation shall configure the
+internal PHY to use the address specified in the child PHY node.
+
+Example:
+
+emac: ethernet@01c0b000 {
+	compatible = "allwinner,sun8i-h3-emac";
+	reg = <0x01c0b000 0x104>, <0x01c00030 0x4>;
+	reg-names = "emac", "syscon";
+	interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&bus_gates 17>, <&bus_gates 128>;
+	clock-names = "ahb", "ephy";
+	resets = <&ahb_rst 17>, <&ahb_rst 66>;
+	reset-names = "ahb", "ephy";
+	phy = <&phy1>;
+	allwinner,use-internal-phy;
+	allwinner,leds-active-low;
+
+	phy1: ethernet-phy@1 {
+		reg = <1>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt
index ee91cbd..9f5ca44 100644
--- a/Documentation/devicetree/bindings/opp/opp.txt
+++ b/Documentation/devicetree/bindings/opp/opp.txt
@@ -86,8 +86,14 @@ Optional properties:
   Single entry is for target voltage and three entries are for <target min max>
   voltages.
 
-  Entries for multiple regulators must be present in the same order as
-  regulators are specified in device's DT node.
+  Entries for multiple regulators shall be provided in the same field separated
+  by angular brackets <>. The OPP binding doesn't provide any provisions to
+  relate the values to their power supplies or the order in which the supplies
+  need to be configured and that is left for the implementation specific
+  binding.
+
+  Entries for all regulators shall be of the same size, i.e. either all use a
+  single value or triplets.
 
 - opp-microvolt-<name>: Named opp-microvolt property. This is exactly similar to
   the above opp-microvolt property, but allows multiple voltage ranges to be
@@ -104,10 +110,13 @@ Optional properties:
 
   Should only be set if opp-microvolt is set for the OPP.
 
-  Entries for multiple regulators must be present in the same order as
-  regulators are specified in device's DT node. If this property isn't required
-  for few regulators, then this should be marked as zero for them. If it isn't
-  required for any regulator, then this property need not be present.
+  Entries for multiple regulators shall be provided in the same field separated
+  by angular brackets <>. If current values aren't required for a regulator,
+  then it shall be filled with 0. If current values aren't required for any of
+  the regulators, then this field is not required. The OPP binding doesn't
+  provide any provisions to relate the values to their power supplies or the
+  order in which the supplies need to be configured and that is left for the
+  implementation specific binding.
 
 - opp-microamp-<name>: Named opp-microamp property. Similar to
   opp-microvolt-<name> property, but for microamp instead.
@@ -386,10 +395,12 @@ Example 4: Handling multiple regulators
 / {
 	cpus {
 		cpu@0 {
-			compatible = "arm,cortex-a7";
+			compatible = "vendor,cpu-type";
 			...
 
-			cpu-supply = <&cpu_supply0>, <&cpu_supply1>, <&cpu_supply2>;
+			vcc0-supply = <&cpu_supply0>;
+			vcc1-supply = <&cpu_supply1>;
+			vcc2-supply = <&cpu_supply2>;
 			operating-points-v2 = <&cpu0_opp_table>;
 		};
 	};
diff --git b/Documentation/devicetree/bindings/pinctrl/ti,iodelay-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/ti,iodelay-pinctrl.txt
new file mode 100644
index 0000000..e12f4e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/ti,iodelay-pinctrl.txt
@@ -0,0 +1,86 @@
+Texas Instruments I/O Delay module configuration pinctrl definition
+
+Used in conjunction with Documentation/devicetree/bindings/pinctrl/ti,omap-pinctrl.txt
+
+Required Properties:
+- compatible: Should be:
+  "ti,dra7-iodelay" - I/O delay configuration for DRA7
+- reg	- must be the register address range of IODelay module
+- #address-cells = <1>;
+- #size-cells = <0>;
+
+Important note: Use of "ti,dra7-iodelay" compatible definition need to be
+carefully evaluated due to the expectation of glitch during configuration.
+
+Example:
+
+dra7_iodelay_core: padconf@4844a000 {
+	compatible = "ti,dra7-iodelay";
+	reg = <0x4844a000 0x0d1c>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+};
+
+Configuration definition follows similar model as the pinctrl-single:
+The groups of pin configuration are defined under "pinctrl-single,pins"
+
+&dra7_iodelay_core {
+	mmc2_iodelay_3v3_conf: mmc2_iodelay_3v3_conf {
+		pinctrl-single,pins = <
+			0x18c (A_DELAY(0) | G_DELAY(120))	/* CFG_GPMC_A19_IN */
+			0x1a4 (A_DELAY(265) | G_DELAY(360))	/* CFG_GPMC_A20_IN */
+			0x1b0 (A_DELAY(0) | G_DELAY(120))	/* CFG_GPMC_A21_IN */
+			0x1bc (A_DELAY(0) | G_DELAY(120))	/* CFG_GPMC_A22_IN */
+			0x1c8 (A_DELAY(287) | G_DELAY(420))	/* CFG_GPMC_A23_IN */
+			0x1d4 (A_DELAY(144) | G_DELAY(240))	/* CFG_GPMC_A24_IN */
+			0x1e0 (A_DELAY(0) | G_DELAY(0))		/* CFG_GPMC_A25_IN */
+			0x1ec (A_DELAY(120) | G_DELAY(0))	/* CFG_GPMC_A26_IN */
+			0x1f8 (A_DELAY(120) | G_DELAY(180))	/* CFG_GPMC_A27_IN */
+			0x360 (A_DELAY(0) | G_DELAY(0))		/* CFG_GPMC_CS1_IN */
+		>;
+	};
+};
+
+Usage in conjunction with pinctrl single:
+
+For a complete description of the pins both the regular muxing as well as the
+iodelay configuration is necessary. For example:
+
+&dra7_pmx_core {
+	mmc2_pins_default: mmc2_pins_default {
+		pinctrl-single,pins = <
+			0x9c (PIN_INPUT_PULLUP | MANUAL_MODE | MUX_MODE1) /* gpmc_a23.mmc2_clk */
+			0xb0 (PIN_INPUT_PULLUP | MANUAL_MODE | MUX_MODE1) /* gpmc_cs1.mmc2_cmd */
+			0xa0 (PIN_INPUT_PULLUP | MANUAL_MODE | MUX_MODE1) /* gpmc_a24.mmc2_dat0 */
+			0xa4 (PIN_INPUT_PULLUP | MANUAL_MODE | MUX_MODE1) /* gpmc_a25.mmc2_dat1 */
+			0xa8 (PIN_INPUT_PULLUP | MANUAL_MODE | MUX_MODE1) /* gpmc_a26.mmc2_dat2 */
+			0xac (PIN_INPUT_PULLUP | MANUAL_MODE | MUX_MODE1) /* gpmc_a27.mmc2_dat3 */
+			0x8c (PIN_INPUT_PULLUP | MANUAL_MODE | MUX_MODE1) /* gpmc_a19.mmc2_dat4 */
+			0x90 (PIN_INPUT_PULLUP | MANUAL_MODE | MUX_MODE1) /* gpmc_a20.mmc2_dat5 */
+			0x94 (PIN_INPUT_PULLUP | MANUAL_MODE | MUX_MODE1) /* gpmc_a21.mmc2_dat6 */
+			0x98 (PIN_INPUT_PULLUP | MANUAL_MODE | MUX_MODE1) /* gpmc_a22.mmc2_dat7 */
+		>;
+	};
+};
+
+&dra7_iodelay_core {
+	mmc2_iodelay_3v3_conf: mmc2_iodelay_3v3_conf {
+		pinctrl-single,pins = <
+			0x18c (A_DELAY(0) | G_DELAY(120))	/* CFG_GPMC_A19_IN */
+			0x1a4 (A_DELAY(265) | G_DELAY(360))	/* CFG_GPMC_A20_IN */
+			0x1b0 (A_DELAY(0) | G_DELAY(120))	/* CFG_GPMC_A21_IN */
+			0x1bc (A_DELAY(0) | G_DELAY(120))	/* CFG_GPMC_A22_IN */
+			0x1c8 (A_DELAY(287) | G_DELAY(420))	/* CFG_GPMC_A23_IN */
+			0x1d4 (A_DELAY(144) | G_DELAY(240))	/* CFG_GPMC_A24_IN */
+			0x1e0 (A_DELAY(0) | G_DELAY(0))		/* CFG_GPMC_A25_IN */
+			0x1ec (A_DELAY(120) | G_DELAY(0))	/* CFG_GPMC_A26_IN */
+			0x1f8 (A_DELAY(120) | G_DELAY(180))	/* CFG_GPMC_A27_IN */
+			0x360 (A_DELAY(0) | G_DELAY(0))		/* CFG_GPMC_CS1_IN */
+		>;
+	};
+};
+
+&mmc2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc2_pins_default &mmc2_iodelay_3v3_conf>;
+};
diff --git b/Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt b/Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt
new file mode 100644
index 0000000..ebf0d47
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt
@@ -0,0 +1,48 @@
+The generic power sequence library
+
+Some hard-wired devices (eg USB/MMC) need to do power sequence before
+the device can be enumerated on the bus, the typical power sequence
+like: enable USB PHY clock, toggle reset pin, etc. But current
+Linux device driver lacks of such code to do it, it may cause some
+hard-wired devices works abnormal or can't be recognized by
+controller at all. The power sequence will be done before this device
+can be found at the bus.
+
+The power sequence properties is under the device node.
+
+Optional properties:
+- clocks: the input clocks for device.
+- reset-gpios: Should specify the GPIO for reset.
+- reset-duration-us: the duration in microsecond for assert reset signal.
+
+Below is the example of USB power sequence properties on USB device
+nodes which have two level USB hubs.
+
+&usbotg1 {
+	vbus-supply = <&reg_usb_otg1_vbus>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usb_otg1_id>;
+	status = "okay";
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	genesys: hub@1 {
+		compatible = "usb5e3,608";
+		reg = <1>;
+
+		clocks = <&clks IMX6SX_CLK_CKO>;
+		reset-gpios = <&gpio4 5 GPIO_ACTIVE_LOW>; /* hub reset pin */
+		reset-duration-us = <10>;
+
+		#address-cells = <1>;
+		#size-cells = <0>;
+		asix: ethernet@1 {
+			compatible = "usbb95,1708";
+			reg = <1>;
+
+			clocks = <&clks IMX6SX_CLK_IPG>;
+			reset-gpios = <&gpio4 6 GPIO_ACTIVE_LOW>; /* ethernet_rst */
+			reset-duration-us = <15>;
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/usb/usb-device.txt b/Documentation/devicetree/bindings/usb/usb-device.txt
index 1c35e7b..3661dd2 100644
--- a/Documentation/devicetree/bindings/usb/usb-device.txt
+++ b/Documentation/devicetree/bindings/usb/usb-device.txt
@@ -13,6 +13,10 @@ Required properties:
 - reg: the port number which this device is connecting to, the range
   is 1-31.
 
+Optional properties:
+power sequence properties, see
+Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt for detail
+
 Example:
 
 &usb1 {
@@ -21,8 +25,12 @@ Example:
 	#address-cells = <1>;
 	#size-cells = <0>;
 
-	hub: genesys@1 {
+	genesys: hub@1 {
 		compatible = "usb5e3,608";
 		reg = <1>;
+
+		clocks = <&clks IMX6SX_CLK_CKO>;
+		reset-gpios = <&gpio4 5 GPIO_ACTIVE_LOW>; /* hub reset pin */
+		reset-duration-us = <10>;
 	};
 }
diff --git b/Documentation/devicetree/configfs-overlays.txt b/Documentation/devicetree/configfs-overlays.txt
new file mode 100644
index 0000000..5fa43e0
--- /dev/null
+++ b/Documentation/devicetree/configfs-overlays.txt
@@ -0,0 +1,31 @@
+Howto use the configfs overlay interface.
+
+A device-tree configfs entry is created in /config/device-tree/overlays
+and and it is manipulated using standard file system I/O.
+Note that this is a debug level interface, for use by developers and
+not necessarily something accessed by normal users due to the
+security implications of having direct access to the kernel's device tree.
+
+* To create an overlay you mkdir the directory:
+
+	# mkdir /config/device-tree/overlays/foo
+
+* Either you echo the overlay firmware file to the path property file.
+
+	# echo foo.dtbo >/config/device-tree/overlays/foo/path
+
+* Or you cat the contents of the overlay to the dtbo file
+
+	# cat foo.dtbo >/config/device-tree/overlays/foo/dtbo
+
+The overlay file will be applied, and devices will be created/destroyed
+as required.
+
+To remove it simply rmdir the directory.
+
+	# rmdir /config/device-tree/overlays/foo
+
+The rationalle of the dual interface (firmware & direct copy) is that each is
+better suited to different use patterns. The firmware interface is what's
+intended to be used by hardware managers in the kernel, while the copy interface
+make sense for developers (since it avoids problems with namespaces).
diff --git a/Documentation/devicetree/overlay-notes.txt b/Documentation/devicetree/overlay-notes.txt
index d418a6c..3e8df30 100644
--- a/Documentation/devicetree/overlay-notes.txt
+++ b/Documentation/devicetree/overlay-notes.txt
@@ -100,6 +100,14 @@ Finally, if you need to remove all overlays in one-go, just call
 of_overlay_destroy_all() which will remove every single one in the correct
 order.
 
+If your board has multiple slots/places where a single overlay can work
+and each slot is defined by a node, you can use the
+of_overlay_create_target_index() method to select the target.
+
+For overlays on probeable busses, use the of_overlay_create_target_root() method
+in which you supply a device node as a target root, and which all target
+references in the overlay are performed relative to that node.
+
 Overlay DTS Format
 ------------------
 
@@ -110,9 +118,11 @@ The DTS of an overlay should have the following format:
 
 	fragment@0 {	/* first child node */
 
-		target=<phandle>;	/* phandle target of the overlay */
+		/* phandle target of the overlay */
+		target=<phandle> [, <phandle>, ...];
 	or
-		target-path="/path";	/* target path of the overlay */
+		/* target path of the overlay */
+		target-path="/path" [ , "/path", ...];
 
 		__overlay__ {
 			property-a;	/* add property-a to the target */
@@ -131,3 +141,11 @@ Using the non-phandle based target method allows one to use a base DT which does
 not contain a __symbols__ node, i.e. it was not compiled with the -@ option.
 The __symbols__ node is only required for the target=<phandle> method, since it
 contains the information required to map from a phandle to a tree location.
+
+Using a target index requires the use of a selector target on the call to
+of_overlay_create_target_index(). I.e. passing an index of 0 will select the
+target in the foo node, an index of 1 the bar node, etc.
+
+Note that when using the target root create method all target references must
+lie under the target root node. I.e. the overlay is not allowed to 'break' out
+of the root.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 86a6746..b093a38 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -121,6 +121,7 @@ parameter is applicable:
 	NET	Appropriate network support is enabled.
 	NUMA	NUMA support is enabled.
 	NFS	Appropriate NFS support is enabled.
+	OF	Open Firmware support (device tree) is enabled.
 	OSS	OSS sound support is enabled.
 	PV_OPS	A paravirtualized kernel is enabled.
 	PARIDE	The ParIDE (parallel port IDE) subsystem is enabled.
@@ -2872,6 +2873,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			This can be set from sysctl after boot.
 			See Documentation/sysctl/vm.txt for details.
 
+	of_overlay_disable	[OF] Disable device tree overlays at boot time.
+
 	ohci1394_dma=early	[HW] enable debugging via the ohci1394 driver.
 			See Documentation/debugging-via-ohci1394.txt for more
 			info.
diff --git b/Documentation/misc-devices/bone_capemgr.txt b/Documentation/misc-devices/bone_capemgr.txt
new file mode 100644
index 0000000..2a8c766
--- /dev/null
+++ b/Documentation/misc-devices/bone_capemgr.txt
@@ -0,0 +1,63 @@
+---------------------------
+  Beaglebone Cape-Manager
+---------------------------
+
+The beaglebone cape manager driver allows the automatic use of external
+peripheral capes to be automatically supported by Linux without any manual
+setup required by the user.
+
+Each beaglebone cape should contain an EEPROM that describes
+it in a fixed I2C address on the i2c2 bus of the baseboard.
+The format of the EEPROM is defined in the beaglebone reference
+manual at:
+http://beagleboard.org/static/beaglebone/latest/Docs/Hardware/BONE_SRM.pdf
+
+Reading the part number and revision information the manager
+requests a firmware file formatted as a device tree overlay blob.
+
+Applying the overlay the devices are instantiated and the cape is
+ready to be used.
+
+For instance if the part-number is BB-BONE-SERL-03 and the version is 00A1
+the firmware file requested will be BB-BONE-SERL-03-00A1-00A1.dtbo
+It will be located by the in-kernel firmware
+loader in the usual place, i.e.  /lib/firmware/`uname -r`, /lib/firmware etc.
+
+The driver supports the following parameters (either as part of the kernel
+command line or supplied at module insertion time).
+
+disable_partno:   A comma delimited list of PART-NUMBER[:REV] of
+                  disabled capes.
+enable_partno:    A comma delimited list of PART-NUMBER[:REV[:PRIO]] of
+                  enabled capes.
+boot_scan_period: The boot scan period in ms. When the cape manager is built-in
+                  the kernel image, the firmware loader cannot find the files
+		  before the rootfs is mounted. This parameter controls the
+		  period with which the boot state is checked in that case.
+
+There's a sysfs control interface which is defined at the ABI documentation
+area.
+
+Theory of operation:
+--------------------
+
+On driver probe the I2C EEPROM of the baseboard is read and information about
+the current baseboard is retrieved. This information includes the mapping from
+baseboard board name to DT friendly compatible string. I.e. the "A335BONE" board
+name from EEPROM is mapped to the "ti,beaglebone" compatible string which should
+be present in the dtbo to be loaded.
+
+Afterwards the EEPROMs declared in each slot are probed, and the EEPROMs found
+are decoded keeping track the cape part-number and version data.
+
+Using the part-number and version a firmware file is requested (the firmware
+file requested is <part-number>-<version>.dtbo).
+
+The dtbo is unflattend and the resulting device tree is matched against a
+compatible baseboard, and in case of multiple parallel loading capes the
+priorities defined are honored.  That means that when there are multiple capes
+being loaded in parallel the ones with the lowest priority number are loaded
+first.
+
+Applying the device tree overlay makes the cape operational, as if it was part
+of the kernel's booting device tree.
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index 5962949..d1ba882 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -324,10 +324,49 @@ Network device features:
 
 	Passed by reference.
 
+Command from struct task_struct
+
+	%pT	ls
+
+	For printing executable name excluding path from struct
+	task_struct.
+
+	Passed by reference.
+
+Device tree nodes:
+
+	%pO[fnpPcCFr]
+
+	For printing device tree nodes. The optional arguments are:
+            f device node full_name
+            n device node name
+            p device node phandle
+            P device node path spec (name + @unit)
+            F device node flags
+            c major compatible string
+            C full compatible string
+            r node reference count
+	Without any arguments prints full_name (same as %pOf)
+	The separator when using multiple arguments is '|'
+
+	Examples:
+
+	%pO	/foo/bar@0			- Node full name
+	%pOf	/foo/bar@0			- Same as above
+	%pOfp	/foo/bar@0|10			- Node full name + phandle
+	%pOfcF	/foo/bar@0|foo,device|--P-	- Node full name +
+	                                          major compatible string +
+						  node flags
+							D - dynamic
+							d - detached
+							P - Populated
+							B - Populated bus
+
+	Passed by reference
+
 If you add other %p extensions, please extend lib/test_printf.c with
 one or more test cases, if at all feasible.
 
-
 Thank you for your cooperation and attention.
 
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 63cefa6..d49fd46 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -597,6 +597,12 @@ S:	Maintained
 F:	Documentation/i2c/busses/i2c-ali1563
 F:	drivers/i2c/busses/i2c-ali1563.c
 
+ALLWINNER SUN8I-EMAC ETHERNET DRIVER
+M:	Corentin Labbe <clabbe.montjoie@gmail.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/allwinner/sun8i-emac.c
+
 ALLWINNER SECURITY SYSTEM
 M:	Corentin Labbe <clabbe.montjoie@gmail.com>
 L:	linux-crypto@vger.kernel.org
@@ -2401,6 +2407,14 @@ W:	https://linuxtv.org
 S:	Supported
 F:	drivers/media/platform/sti/bdisp
 
+BEAGLEBONE CAPEMANAGER
+M:	Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+S:	Maintained
+F:	drivers/misc/beaglebone-capemgr.c
+F:	Documentation/misc-devices/bone_capemgr.txt
+F:	Documentation/devicetree/bindings/misc/bone_capemgr.txt
+F:	Documentation/ABI/testing/sysfs-devices-platform-bone_capemgr
+
 BEFS FILE SYSTEM
 M:	Luis de Bethencourt <luisbg@osg.samsung.com>
 M:	Salah Triki <salah.triki@gmail.com>
@@ -9635,6 +9649,15 @@ F:	include/linux/pm_*
 F:	include/linux/powercap.h
 F:	drivers/powercap/
 
+POWER SEQUENCE LIBRARY
+M:	Peter Chen <Peter.Chen@nxp.com>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git
+L:	linux-pm@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/power/pwrseq/
+F:	drivers/power/pwrseq/
+F:	include/linux/power/pwrseq.h/
+
 POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS
 M:	Sebastian Reichel <sre@kernel.org>
 L:	linux-pm@vger.kernel.org
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b5d529f..ced2e08 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1496,8 +1496,7 @@ source kernel/Kconfig.preempt
 
 config HZ_FIXED
 	int
-	default 200 if ARCH_EBSA110 || ARCH_S3C24XX || \
-		ARCH_S5PV210 || ARCH_EXYNOS4
+	default 200 if ARCH_EBSA110
 	default 128 if SOC_AT91RM9200
 	default 0
 
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 6be9ee1..6deeab7 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -13,7 +13,12 @@
 # Ensure linker flags are correct
 LDFLAGS		:=
 
-LDFLAGS_vmlinux	:=-p --no-undefined -X --pic-veneer
+GCCVERSIONISGTE5 := $(shell expr `$(HOSTCC) -dumpversion | cut -f1 -d.` \>= 5)
+ifeq  "$(GCCVERSIONISGTE5)" "1"
+LDFLAGS_vmlinux :=-p --no-undefined -X
+else
+LDFLAGS_vmlinux :=-p --no-undefined -X --pic-veneer
+endif
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux	+= --be8
 LDFLAGS_MODULE	+= --be8
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index 50f8d1b..2c30c44 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -29,6 +29,10 @@ export ZRELADDR INITRD_PHYS PARAMS_PHYS
 
 targets := Image zImage xipImage bootpImage uImage
 
+ifeq ($(CONFIG_OF_OVERLAY),y)
+DTC_FLAGS += -@
+endif
+
 ifeq ($(CONFIG_XIP_KERNEL),y)
 
 $(obj)/xipImage: vmlinux FORCE
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 7037201..aabd736 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -1,11 +1,17 @@
 ifeq ($(CONFIG_OF),y)
 
+ifeq ($(CONFIG_OF_OVERLAY),y)
+DTC_FLAGS += -@
+endif
+
 dtb-$(CONFIG_ARCH_ALPINE) += \
 	alpine-db.dtb
 dtb-$(CONFIG_MACH_ARTPEC6) += \
 	artpec6-devboard.dtb
+
 dtb-$(CONFIG_MACH_ASM9260) += \
 	alphascale-asm9260-devkit.dtb
+
 # Keep at91 dtb files sorted alphabetically for each SoC
 dtb-$(CONFIG_SOC_AT91RM9200) += \
 	at91rm9200ek.dtb \
@@ -155,6 +161,7 @@ dtb-$(CONFIG_ARCH_EXYNOS5) += \
 	exynos5420-arndale-octa.dtb \
 	exynos5420-peach-pit.dtb \
 	exynos5420-smdk5420.dtb \
+	exynos5422-artik10-eval.dtb \
 	exynos5422-odroidxu3.dtb \
 	exynos5422-odroidxu3-lite.dtb \
 	exynos5422-odroidxu4.dtb \
@@ -366,6 +373,7 @@ dtb-$(CONFIG_SOC_IMX6Q) += \
 	imx6q-b650v3.dtb \
 	imx6q-b850v3.dtb \
 	imx6q-cm-fx6.dtb \
+	imx6q-ccimx6sbc.dtb \
 	imx6q-cubox-i.dtb \
 	imx6q-dfi-fs700-m60.dtb \
 	imx6q-dmo-edmqmx6.dtb \
@@ -419,6 +427,7 @@ dtb-$(CONFIG_SOC_IMX6SX) += \
 	imx6sx-sdb.dtb
 dtb-$(CONFIG_SOC_IMX6UL) += \
 	imx6ul-14x14-evk.dtb \
+	imx6ul-14x14-evk-ism43362-b81-evb.dtb \
 	imx6ul-geam-kit.dtb \
 	imx6ul-pico-hobbit.dtb \
 	imx6ul-tx6ul-0010.dtb \
@@ -548,6 +557,28 @@ dtb-$(CONFIG_SOC_AM33XX) += \
 	am335x-base0033.dtb \
 	am335x-bone.dtb \
 	am335x-boneblack.dtb \
+	am335x-boneblack-uboot.dtb \
+	am335x-sancloud-bbe.dtb \
+	am335x-boneblack-wireless-roboticscape.dtb \
+	am335x-boneblack-roboticscape.dtb \
+	am335x-boneblue.dtb \
+	am335x-boneblack-wireless-emmc-overlay.dtb \
+	am335x-boneblack-wireless.dtb \
+	am335x-bonegreen-wireless.dtb \
+	am335x-boneblack-audio.dtb \
+	am335x-boneblack-bbb-exp-r.dtb \
+	am335x-boneblack-bbb-exp-c.dtb \
+	am335x-boneblack-bbbmini.dtb \
+	am335x-boneblack-wl1835mod.dtb \
+	am335x-boneblack-cape-bone-argus.dtb \
+	am335x-bone-cape-bone-argus.dtb \
+	am335x-olimex-som.dtb \
+	am335x-abbbi.dtb \
+	am335x-bonegreen-overlay.dtb \
+	am335x-boneblack-overlay.dtb \
+	am335x-boneblack-nhdmi-overlay.dtb \
+	am335x-boneblack-hdmi-overlay.dtb \
+	am335x-boneblack-emmc-overlay.dtb \
 	am335x-bonegreen.dtb \
 	am335x-chiliboard.dtb \
 	am335x-cm-t335.dtb \
@@ -567,6 +598,7 @@ dtb-$(CONFIG_ARCH_OMAP4) += \
 	omap4-panda.dtb \
 	omap4-panda-a4.dtb \
 	omap4-panda-es.dtb \
+	omap4-panda-es-b3.dtb \
 	omap4-sdp.dtb \
 	omap4-sdp-es23plus.dtb \
 	omap4-var-dvk-om44.dtb \
diff --git b/arch/arm/boot/dts/am335x-abbbi.dts b/arch/arm/boot/dts/am335x-abbbi.dts
new file mode 100644
index 0000000..43efead
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-abbbi.dts
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright 2015 Konsulko Group
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+
+
+/ {
+	model = "Arrow BeagleBone Black Industrial";
+	compatible = "arrow,am335x-abbbi", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
+
+&am33xx_pinmux {
+	adi_hdmi_bbbi_pins: adi_hdmi_bbbi_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+			AM33XX_IOPAD(0x8a0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data0.lcd_data0 */
+			AM33XX_IOPAD(0x8a4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data1.lcd_data1 */
+			AM33XX_IOPAD(0x8a8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data2.lcd_data2 */
+			AM33XX_IOPAD(0x8ac, PIN_OUTPUT | MUX_MODE0)		/* lcd_data3.lcd_data3 */
+			AM33XX_IOPAD(0x8b0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data4.lcd_data4 */
+			AM33XX_IOPAD(0x8b4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data5.lcd_data5 */
+			AM33XX_IOPAD(0x8b8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data6.lcd_data6 */
+			AM33XX_IOPAD(0x8bc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data7.lcd_data7 */
+			AM33XX_IOPAD(0x8c0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data8.lcd_data8 */
+			AM33XX_IOPAD(0x8c4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data9.lcd_data9 */
+			AM33XX_IOPAD(0x8c8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data10.lcd_data10 */
+			AM33XX_IOPAD(0x8cc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data11.lcd_data11 */
+			AM33XX_IOPAD(0x8d0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data12.lcd_data12 */
+			AM33XX_IOPAD(0x8d4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data13.lcd_data13 */
+			AM33XX_IOPAD(0x8d8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data14.lcd_data14 */
+			AM33XX_IOPAD(0x8dc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data15.lcd_data15 */
+			AM33XX_IOPAD(0x8e0, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_vsync.lcd_vsync */
+			AM33XX_IOPAD(0x8e4, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_hsync.lcd_hsync */
+			AM33XX_IOPAD(0x8e8, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_pclk.lcd_pclk */
+			AM33XX_IOPAD(0x8ec, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_ac_bias_en.lcd_ac_bias_en */
+		>;
+	};
+	adi_hdmi_bbbi_off_pins: adi_hdmi_bbbi_off_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+		>;
+	};
+
+	mcasp0_pins: mcasp0_pins {
+		pinctrl-single,pins = <
+			0x1ac (PIN_INPUT_PULLUP | MUX_MODE0)	/* mcasp0_ahclkx.mcasp0_ahclkx */
+			0x19c (PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* mcasp0_ahclkr.mcasp0_axr2 */
+			0x194 (PIN_OUTPUT_PULLUP | MUX_MODE0)	/* mcasp0_fsx.mcasp0_fsx */
+			0x190 (PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mcasp0_aclkx.mcasp0_aclkx */
+			0x06c (PIN_OUTPUT_PULLDOWN | MUX_MODE7)	/* gpmc_a11.GPIO1_27 */
+		>;
+	};
+
+	mcasp0_pins_sleep: mcasp0_pins_sleep {
+		pinctrl-single,pins = <
+			0x1ac (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* mcasp0_ahclkx.mcasp0_ahclkx */
+			0x19c (PIN_INPUT_PULLDOWN | MUX_MODE7) /* mcasp0_ahclkr.mcasp0_axr2 */
+			0x194 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* mcasp0_fsx.mcasp0_fsx */
+			0x190 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* mcasp0_aclkx.mcasp0_aclkx */
+			0x06c (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* gpmc_a11.GPIO1_27 */
+		>;
+	};
+};
+
+&lcdc {
+	status = "okay";
+	port {
+		lcdc_0: endpoint@0 {
+			remote-endpoint = <&hdmi_0>;
+		};
+	};
+};
+
+&i2c0 {
+	adv7511w {
+		compatible = "adi,adv7511w";
+		reg = <0x39>;
+		pinctrl-names = "default", "off";
+		pinctrl-0 = <&adi_hdmi_bbbi_pins>;
+		pinctrl-1 = <&adi_hdmi_bbbi_off_pins>;
+
+		port {
+			hdmi_0: endpoint@0 {
+				remote-endpoint = <&lcdc_0>;
+			};
+		};
+	};
+};
+
+&mcasp0	{
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&mcasp0_pins>;
+	pinctrl-1 = <&mcasp0_pins_sleep>;
+	status = "okay";
+	op-mode = <0>;	/* MCASP_IIS_MODE */
+	tdm-slots = <2>;
+	serial-dir = <	/* 0: INACTIVE, 1: TX, 2: RX */
+			0 0 1 0
+		>;
+	tx-num-evt = <1>;
+	rx-num-evt = <1>;
+};
+
+/ {
+	clk_mcasp0_fixed: clk_mcasp0_fixed {
+	      #clock-cells = <0>;
+	      compatible = "fixed-clock";
+	      clock-frequency = <24576000>;
+	};
+
+	clk_mcasp0: clk_mcasp0 {
+	      #clock-cells = <0>;
+	      compatible = "gpio-gate-clock";
+	      clocks = <&clk_mcasp0_fixed>;
+	      enable-gpios = <&gpio1 27 0>; /* BeagleBone Black Clk enable on GPIO1_27 */
+	};
+
+	hdmi_audio: hdmi_audio@0 {
+	       compatible = "linux,hdmi-audio";
+	       status = "okay";
+	};
+
+	sound {
+		compatible = "ti,beaglebone-black-audio";
+		ti,model = "TI BeagleBone Black";
+		ti,audio-codec = <&hdmi_audio>;
+		ti,mcasp-controller = <&mcasp0>;
+		ti,audio-routing =
+			"HDMI Out",	"TX";
+		clocks = <&clk_mcasp0>;
+		clock-names = "mclk";
+	};
+};
+
+&rtc {
+	system-power-controller;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-argus.dtsi b/arch/arm/boot/dts/am335x-bone-argus.dtsi
new file mode 100644
index 0000000..21afad3
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-argus.dtsi
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+
+/ {
+	ocp {
+		P8_07_pinmux {
+			/* gpio2[2] */
+			status = "disabled";
+		};
+		P8_08_pinmux {
+			/* gpio2[3] */
+			status = "disabled";
+		};
+		P8_09_pinmux {
+			/* gpio2[5] */
+			status = "disabled";
+		};
+		P8_10_pinmux {
+			/* gpio2[4] */
+			status = "disabled";
+		};
+		P9_11_pinmux {
+			/* gpio0[30] */
+			status = "disabled";
+		};
+		P9_17_pinmux {
+			/* gpio0[5] */
+			status = "disabled";
+		};
+		P9_18_pinmux {
+			/* gpio0[4] */
+			status = "disabled";
+		};
+		P9_41_pinmux {
+			/* gpio0[20] */
+			status = "disabled";
+		};
+		P9_42_pinmux {
+			/* gpio0[7] */
+			status = "disabled";
+		};
+	};
+};
+
+/ {
+	argus-ups {
+		compatible = "argus-ups";
+		status = "okay";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&argus_ups_pins>; /* Refer to previous label */
+		/* This section communicates the gpio numbers to the driver module */
+		/* Note that gpio controllers appear to be numbered from 1-n here rather than 0-(n-1)????? */
+		gpios = <&gpio0 30 0>,  /* Request */
+			<&gpio0 5 0>,  	/* Acknowledge */
+			<&gpio0 4 0>,   /* Watchdog */
+			<&gpio2 2 0>, 	/* LED 1 Green */
+			<&gpio2 3 0>, 	/* LED 1 Red */
+			<&gpio2 5 0>, 	/* LED 2 Green */
+			<&gpio2 4 0>, 	/* LED 2 Red */
+			<&gpio0 20 0>,	/* General Output #1 */
+			<&gpio0 7 0>;	/* General Output #2 */
+		debug = <1>;
+		shutdown = <1>;
+	};
+};
+
+&am33xx_pinmux {
+	argus_ups_pins: pinmux_argus_ups_pins { /* Set up pinmux */
+		pinctrl-single,pins = <
+			0x070 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_wait0.gpio0_30 */
+			0x15c (PIN_OUTPUT_PULLUP | MUX_MODE7) /* spi0_cs0.gpio0_5 */
+			0x158 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* spi0_d1.gpio0_4 */
+			0x090 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_advn_ale.gpio_2 */
+			0x094 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_oen_ren.gpio2_3 */
+			0x09c (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_ben0_cle.gpio2_5 */
+			0x098 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* gpmc_gpmc_wen.gpio2_4 */
+			0x1b4 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* xdma_event_intr1.gpio0_20 */
+			0x164 (PIN_OUTPUT_PULLUP | MUX_MODE7) /* ecap0_in_pwm0_out.gpio0_7 */
+		>;
+	};
+
+	i2c2_pins: pinmux_i2c2_pins {
+		pinctrl-single,pins = <
+			BONE_P9_20 0x73 /* (SLEWCTRL_SLOW | PIN_INPUT_PULLUP | MUX_MODE3) uart1_ctsn.i2c2_sda */
+			BONE_P9_19 0x73 /* (SLEWCTRL_SLOW | PIN_INPUT_PULLUP | MUX_MODE3) uart1_rtsn.i2c2_scl */
+		>;
+	};
+};
+
+&i2c2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c2_pins>;
+
+	status = "okay";
+	clock-frequency = <100000>;
+
+	rtc@68 {
+		compatible = "maxim,ds1307";
+		reg = <0x68>;
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-bone-cape-bone-argus.dts b/arch/arm/boot/dts/am335x-bone-cape-bone-argus.dts
new file mode 100644
index 0000000..82218f5
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-cape-bone-argus.dts
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common-no-capemgr.dtsi"
+
+/ {
+	model = "TI AM335x BeagleBone";
+	compatible = "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <3300000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&ldo3_reg>;
+};
+
+#include "am335x-bone-argus.dtsi"
diff --git b/arch/arm/boot/dts/am335x-bone-common-no-capemgr.dtsi b/arch/arm/boot/dts/am335x-bone-common-no-capemgr.dtsi
new file mode 100644
index 0000000..56b9ccf
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-common-no-capemgr.dtsi
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/mfd/tps65217.h>
+
+/ {
+	cpus {
+		cpu@0 {
+			cpu0-supply = <&dcdc2_reg>;
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x80000000 0x10000000>; /* 256 MB */
+	};
+
+	chosen {
+		stdout-path = &uart0;
+	};
+
+	leds {
+		pinctrl-names = "default";
+		pinctrl-0 = <&user_leds_s0>;
+
+		compatible = "gpio-leds";
+
+		led2 {
+			label = "beaglebone:green:usr0";
+			gpios = <&gpio1 21 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "heartbeat";
+			default-state = "off";
+		};
+
+		led3 {
+			label = "beaglebone:green:usr1";
+			gpios = <&gpio1 22 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "mmc0";
+			default-state = "off";
+		};
+
+		led4 {
+			label = "beaglebone:green:usr2";
+			gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "cpu0";
+			default-state = "off";
+		};
+
+		led5 {
+			label = "beaglebone:green:usr3";
+			gpios = <&gpio1 24 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "mmc1";
+			default-state = "off";
+		};
+	};
+
+	vmmcsd_fixed: fixedregulator0 {
+		compatible = "regulator-fixed";
+		regulator-name = "vmmcsd_fixed";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
+};
+
+&am33xx_pinmux {
+	user_leds_s0: user_leds_s0 {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x854, PIN_OUTPUT_PULLDOWN | MUX_MODE7)	/* gpmc_a5.gpio1_21 */
+			AM33XX_IOPAD(0x858, PIN_OUTPUT_PULLUP | MUX_MODE7)	/* gpmc_a6.gpio1_22 */
+			AM33XX_IOPAD(0x85c, PIN_OUTPUT_PULLDOWN | MUX_MODE7)	/* gpmc_a7.gpio1_23 */
+			AM33XX_IOPAD(0x860, PIN_OUTPUT_PULLUP | MUX_MODE7)	/* gpmc_a8.gpio1_24 */
+		>;
+	};
+
+	i2c0_pins: pinmux_i2c0_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x988, PIN_INPUT_PULLUP | MUX_MODE0)	/* i2c0_sda.i2c0_sda */
+			AM33XX_IOPAD(0x98c, PIN_INPUT_PULLUP | MUX_MODE0)	/* i2c0_scl.i2c0_scl */
+		>;
+	};
+
+	i2c2_pins: pinmux_i2c2_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x978, PIN_INPUT_PULLUP | MUX_MODE3)	/* uart1_ctsn.i2c2_sda */
+			AM33XX_IOPAD(0x97c, PIN_INPUT_PULLUP | MUX_MODE3)	/* uart1_rtsn.i2c2_scl */
+		>;
+	};
+
+	uart0_pins: pinmux_uart0_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x970, PIN_INPUT_PULLUP | MUX_MODE0)	/* uart0_rxd.uart0_rxd */
+			AM33XX_IOPAD(0x974, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* uart0_txd.uart0_txd */
+		>;
+	};
+
+	cpsw_default: cpsw_default {
+		pinctrl-single,pins = <
+			/* Slave 1 */
+			0x108 (PIN_INPUT | MUX_MODE0)		/* mii1_col.mii1_col */
+			0x10c (PIN_INPUT | MUX_MODE0)		/* mii1_crs.mii1_crs */
+			AM33XX_IOPAD(0x910, PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxerr.mii1_rxerr */
+			AM33XX_IOPAD(0x914, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mii1_txen.mii1_txen */
+			AM33XX_IOPAD(0x918, PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxdv.mii1_rxdv */
+			AM33XX_IOPAD(0x91c, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mii1_txd3.mii1_txd3 */
+			AM33XX_IOPAD(0x920, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mii1_txd2.mii1_txd2 */
+			AM33XX_IOPAD(0x924, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mii1_txd1.mii1_txd1 */
+			AM33XX_IOPAD(0x928, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mii1_txd0.mii1_txd0 */
+			AM33XX_IOPAD(0x92c, PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_txclk.mii1_txclk */
+			AM33XX_IOPAD(0x930, PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxclk.mii1_rxclk */
+			AM33XX_IOPAD(0x934, PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxd3.mii1_rxd3 */
+			AM33XX_IOPAD(0x938, PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxd2.mii1_rxd2 */
+			AM33XX_IOPAD(0x93c, PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxd1.mii1_rxd1 */
+			AM33XX_IOPAD(0x940, PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxd0.mii1_rxd0 */
+		>;
+	};
+
+	cpsw_sleep: cpsw_sleep {
+		pinctrl-single,pins = <
+			/* Slave 1 reset value */
+			0x108 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x10c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x910, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x914, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x918, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x91c, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x920, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x924, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x928, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x92c, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x930, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x934, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x938, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x93c, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x940, PIN_INPUT_PULLDOWN | MUX_MODE7)
+		>;
+	};
+
+	davinci_mdio_default: davinci_mdio_default {
+		pinctrl-single,pins = <
+			/* MDIO */
+			AM33XX_IOPAD(0x948, PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0)	/* mdio_data.mdio_data */
+			AM33XX_IOPAD(0x94c, PIN_OUTPUT_PULLUP | MUX_MODE0)			/* mdio_clk.mdio_clk */
+		>;
+	};
+
+	davinci_mdio_sleep: davinci_mdio_sleep {
+		pinctrl-single,pins = <
+			/* MDIO reset value */
+			AM33XX_IOPAD(0x948, PIN_INPUT_PULLDOWN | MUX_MODE7)
+			AM33XX_IOPAD(0x94c, PIN_INPUT_PULLDOWN | MUX_MODE7)
+		>;
+	};
+
+	mmc1_pins: pinmux_mmc1_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x960, PIN_INPUT | MUX_MODE7) /* GPIO0_6 */
+		>;
+	};
+
+	emmc_pins: pinmux_emmc_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x880, PIN_INPUT_PULLUP | MUX_MODE2) /* gpmc_csn1.mmc1_clk */
+			AM33XX_IOPAD(0x884, PIN_INPUT_PULLUP | MUX_MODE2) /* gpmc_csn2.mmc1_cmd */
+			AM33XX_IOPAD(0x800, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad0.mmc1_dat0 */
+			AM33XX_IOPAD(0x804, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad1.mmc1_dat1 */
+			AM33XX_IOPAD(0x808, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad2.mmc1_dat2 */
+			AM33XX_IOPAD(0x80c, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad3.mmc1_dat3 */
+			AM33XX_IOPAD(0x810, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad4.mmc1_dat4 */
+			AM33XX_IOPAD(0x814, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad5.mmc1_dat5 */
+			AM33XX_IOPAD(0x818, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad6.mmc1_dat6 */
+			AM33XX_IOPAD(0x81c, PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad7.mmc1_dat7 */
+		>;
+	};
+};
+
+&uart0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart0_pins>;
+
+	status = "okay";
+};
+
+&usb {
+	status = "okay";
+};
+
+&usb_ctrl_mod {
+	status = "okay";
+};
+
+&usb0_phy {
+	status = "okay";
+};
+
+&usb1_phy {
+	status = "okay";
+};
+
+&usb0 {
+	status = "okay";
+	dr_mode = "peripheral";
+};
+
+&usb1 {
+	status = "okay";
+	dr_mode = "host";
+};
+
+&cppi41dma  {
+	status = "okay";
+};
+
+&i2c0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c0_pins>;
+
+	status = "okay";
+	clock-frequency = <400000>;
+
+	tps: tps@24 {
+		reg = <0x24>;
+	};
+
+	baseboard_eeprom: baseboard_eeprom@50 {
+		compatible = "at,24c256";
+		reg = <0x50>;
+
+		#address-cells = <1>;
+		#size-cells = <1>;
+		baseboard_data: baseboard_data@0 {
+			reg = <0 0x100>;
+		};
+	};
+};
+
+/include/ "tps65217.dtsi"
+
+&tps {
+	/*
+	 * Configure pmic to enter OFF-state instead of SLEEP-state ("RTC-only
+	 * mode") at poweroff.  Most BeagleBone versions do not support RTC-only
+	 * mode and risk hardware damage if this mode is entered.
+	 *
+	 * For details, see linux-omap mailing list May 2015 thread
+	 *	[PATCH] ARM: dts: am335x-bone* enable pmic-shutdown-controller
+	 * In particular, messages:
+	 *	http://www.spinics.net/lists/linux-omap/msg118585.html
+	 *	http://www.spinics.net/lists/linux-omap/msg118615.html
+	 *
+	 * You can override this later with
+	 *	&tps {  /delete-property/ ti,pmic-shutdown-controller;  }
+	 * if you want to use RTC-only mode and made sure you are not affected
+	 * by the hardware problems. (Tip: double-check by performing a current
+	 * measurement after shutdown: it should be less than 1 mA.)
+	 */
+
+	interrupts = <7>; /* NMI */
+	interrupt-parent = <&intc>;
+
+	ti,pmic-shutdown-controller;
+
+	charger {
+		interrupts = <TPS65217_IRQ_AC>, <TPS65217_IRQ_USB>;
+		interrupts-names = "AC", "USB";
+		status = "okay";
+	};
+
+	pwrbutton {
+		interrupts = <TPS65217_IRQ_PB>;
+		status = "okay";
+	};
+
+	regulators {
+		dcdc1_reg: regulator@0 {
+			regulator-name = "vdds_dpr";
+			regulator-always-on;
+		};
+
+		dcdc2_reg: regulator@1 {
+			/* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
+			regulator-name = "vdd_mpu";
+			regulator-min-microvolt = <925000>;
+			regulator-max-microvolt = <1351500>;
+			regulator-boot-on;
+			regulator-always-on;
+		};
+
+		dcdc3_reg: regulator@2 {
+			/* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
+			regulator-name = "vdd_core";
+			regulator-min-microvolt = <925000>;
+			regulator-max-microvolt = <1150000>;
+			regulator-boot-on;
+			regulator-always-on;
+		};
+
+		ldo1_reg: regulator@3 {
+			regulator-name = "vio,vrtc,vdds";
+			regulator-always-on;
+		};
+
+		ldo2_reg: regulator@4 {
+			regulator-name = "vdd_3v3aux";
+			regulator-always-on;
+		};
+
+		ldo3_reg: regulator@5 {
+			regulator-name = "vdd_1v8";
+			regulator-always-on;
+		};
+
+		ldo4_reg: regulator@6 {
+			regulator-name = "vdd_3v3a";
+			regulator-always-on;
+		};
+	};
+};
+
+&cpsw_emac0 {
+	phy_id = <&davinci_mdio>, <0>;
+	phy-mode = "mii";
+};
+
+&mac {
+	slaves = <1>;
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&cpsw_default>;
+	pinctrl-1 = <&cpsw_sleep>;
+	status = "okay";
+};
+
+&davinci_mdio {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&davinci_mdio_default>;
+	pinctrl-1 = <&davinci_mdio_sleep>;
+	status = "okay";
+};
+
+&mmc1 {
+	status = "okay";
+	bus-width = <0x4>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc1_pins>;
+	cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>;
+};
+
+&aes {
+	status = "okay";
+};
+
+&sham {
+	status = "okay";
+};
+
+&rtc {
+	system-power-controller;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-common-universal-pins.dtsi b/arch/arm/boot/dts/am335x-bone-common-universal-pins.dtsi
new file mode 100644
index 0000000..e4d4971
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-common-universal-pins.dtsi
@@ -0,0 +1,941 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&am33xx_pinmux {
+	/************************/
+	/* P8 Header            */
+	/************************/
+
+	/* P8_01                GND     */
+	/* P8_02                GND     */
+	/* P8_03 (ZCZ ball R9 ) emmc    */
+	/* P8_04 (ZCZ ball T9 ) emmc    */
+	/* P8_05 (ZCZ ball R8 ) emmc    */
+	/* P8_06 (ZCZ ball T8 ) emmc    */
+
+	/* P8_07 (ZCZ ball R7 ) */
+	P8_07_default_pin: pinmux_P8_07_default_pin {
+		pinctrl-single,pins = <0x090  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_07_gpio_pin: pinmux_P8_07_gpio_pin {
+		pinctrl-single,pins = <0x090  0x2F>; };     /* Mode 7, RxActive */
+	P8_07_gpio_pu_pin: pinmux_P8_07_gpio_pu_pin {
+		pinctrl-single,pins = <0x090  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_07_gpio_pd_pin: pinmux_P8_07_gpio_pd_pin {
+		pinctrl-single,pins = <0x090  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_07_timer_pin: pinmux_P8_07_timer_pin {
+		pinctrl-single,pins = <0x090  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+
+	/* P8_08 (ZCZ ball T7 ) */
+	P8_08_default_pin: pinmux_P8_08_default_pin {
+		pinctrl-single,pins = <0x094  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_08_gpio_pin: pinmux_P8_08_gpio_pin {
+		pinctrl-single,pins = <0x094  0x2F>; };     /* Mode 7, RxActive */
+	P8_08_gpio_pu_pin: pinmux_P8_08_gpio_pu_pin {
+		pinctrl-single,pins = <0x094  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_08_gpio_pd_pin: pinmux_P8_08_gpio_pd_pin {
+		pinctrl-single,pins = <0x094  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_08_timer_pin: pinmux_P8_08_timer_pin {
+		pinctrl-single,pins = <0x094  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+
+	/* P8_09 (ZCZ ball T6 ) */
+	P8_09_default_pin: pinmux_P8_09_default_pin {
+		pinctrl-single,pins = <0x09c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_09_gpio_pin: pinmux_P8_09_gpio_pin {
+		pinctrl-single,pins = <0x09c  0x2F>; };     /* Mode 7, RxActive */
+	P8_09_gpio_pu_pin: pinmux_P8_09_gpio_pu_pin {
+		pinctrl-single,pins = <0x09c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_09_gpio_pd_pin: pinmux_P8_09_gpio_pd_pin {
+		pinctrl-single,pins = <0x09c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_09_timer_pin: pinmux_P8_09_timer_pin {
+		pinctrl-single,pins = <0x09c  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+
+	/* P8_10 (ZCZ ball U6 ) */
+	P8_10_default_pin: pinmux_P8_10_default_pin {
+		pinctrl-single,pins = <0x098  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_10_gpio_pin: pinmux_P8_10_gpio_pin {
+		pinctrl-single,pins = <0x098  0x2F>; };     /* Mode 7, RxActive */
+	P8_10_gpio_pu_pin: pinmux_P8_10_gpio_pu_pin {
+		pinctrl-single,pins = <0x098  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_10_gpio_pd_pin: pinmux_P8_10_gpio_pd_pin {
+		pinctrl-single,pins = <0x098  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_10_timer_pin: pinmux_P8_10_timer_pin {
+		pinctrl-single,pins = <0x098  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+
+	/* P8_11 (ZCZ ball R12) */
+	P8_11_default_pin: pinmux_P8_11_default_pin {
+		pinctrl-single,pins = <0x034  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_11_gpio_pin: pinmux_P8_11_gpio_pin {
+		pinctrl-single,pins = <0x034  0x2F>; };     /* Mode 7, RxActive */
+	P8_11_gpio_pu_pin: pinmux_P8_11_gpio_pu_pin {
+		pinctrl-single,pins = <0x034  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_11_gpio_pd_pin: pinmux_P8_11_gpio_pd_pin {
+		pinctrl-single,pins = <0x034  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_11_pruout_pin: pinmux_P8_11_pruout_pin {
+		pinctrl-single,pins = <0x034  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_11_qep_pin: pinmux_P8_11_qep_pin {
+		pinctrl-single,pins = <0x034  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_12 (ZCZ ball T12) */
+	P8_12_default_pin: pinmux_P8_12_default_pin {
+		pinctrl-single,pins = <0x030  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_12_gpio_pin: pinmux_P8_12_gpio_pin {
+		pinctrl-single,pins = <0x030  0x2F>; };     /* Mode 7, RxActive */
+	P8_12_gpio_pu_pin: pinmux_P8_12_gpio_pu_pin {
+		pinctrl-single,pins = <0x030  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_12_gpio_pd_pin: pinmux_P8_12_gpio_pd_pin {
+		pinctrl-single,pins = <0x030  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_12_pruout_pin: pinmux_P8_12_pruout_pin {
+		pinctrl-single,pins = <0x030  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_12_qep_pin: pinmux_P8_12_qep_pin {
+		pinctrl-single,pins = <0x030  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_13 (ZCZ ball T10) */
+	P8_13_default_pin: pinmux_P8_13_default_pin {
+		pinctrl-single,pins = <0x024  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_13_gpio_pin: pinmux_P8_13_gpio_pin {
+		pinctrl-single,pins = <0x024  0x2F>; };     /* Mode 7, RxActive */
+	P8_13_gpio_pu_pin: pinmux_P8_13_gpio_pu_pin {
+		pinctrl-single,pins = <0x024  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_13_gpio_pd_pin: pinmux_P8_13_gpio_pd_pin {
+		pinctrl-single,pins = <0x024  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_13_pwm_pin: pinmux_P8_13_pwm_pin {
+		pinctrl-single,pins = <0x024  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_14 (ZCZ ball T11) */
+	P8_14_default_pin: pinmux_P8_14_default_pin {
+		pinctrl-single,pins = <0x028  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_14_gpio_pin: pinmux_P8_14_gpio_pin {
+		pinctrl-single,pins = <0x028  0x2F>; };     /* Mode 7, RxActive */
+	P8_14_gpio_pu_pin: pinmux_P8_14_gpio_pu_pin {
+		pinctrl-single,pins = <0x028  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_14_gpio_pd_pin: pinmux_P8_14_gpio_pd_pin {
+		pinctrl-single,pins = <0x028  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_14_pwm_pin: pinmux_P8_14_pwm_pin {
+		pinctrl-single,pins = <0x028  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_15 (ZCZ ball U13) */
+	P8_15_default_pin: pinmux_P8_15_default_pin {
+		pinctrl-single,pins = <0x03c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_15_gpio_pin: pinmux_P8_15_gpio_pin {
+		pinctrl-single,pins = <0x03c  0x2F>; };     /* Mode 7, RxActive */
+	P8_15_gpio_pu_pin: pinmux_P8_15_gpio_pu_pin {
+		pinctrl-single,pins = <0x03c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_15_gpio_pd_pin: pinmux_P8_15_gpio_pd_pin {
+		pinctrl-single,pins = <0x03c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_15_pruin_pin: pinmux_P8_15_pruin_pin {
+		pinctrl-single,pins = <0x03c  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_15_qep_pin: pinmux_P8_15_qep_pin {
+		pinctrl-single,pins = <0x03c  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_16 (ZCZ ball V13) */
+	P8_16_default_pin: pinmux_P8_16_default_pin {
+		pinctrl-single,pins = <0x038  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_16_gpio_pin: pinmux_P8_16_gpio_pin {
+		pinctrl-single,pins = <0x038  0x2F>; };     /* Mode 7, RxActive */
+	P8_16_gpio_pu_pin: pinmux_P8_16_gpio_pu_pin {
+		pinctrl-single,pins = <0x038  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_16_gpio_pd_pin: pinmux_P8_16_gpio_pd_pin {
+		pinctrl-single,pins = <0x038  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_16_pruin_pin: pinmux_P8_16_pruin_pin {
+		pinctrl-single,pins = <0x038  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_16_qep_pin: pinmux_P8_16_qep_pin {
+		pinctrl-single,pins = <0x038  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_17 (ZCZ ball U12) */
+	P8_17_default_pin: pinmux_P8_17_default_pin {
+		pinctrl-single,pins = <0x02c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_17_gpio_pin: pinmux_P8_17_gpio_pin {
+		pinctrl-single,pins = <0x02c  0x2F>; };     /* Mode 7, RxActive */
+	P8_17_gpio_pu_pin: pinmux_P8_17_gpio_pu_pin {
+		pinctrl-single,pins = <0x02c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_17_gpio_pd_pin: pinmux_P8_17_gpio_pd_pin {
+		pinctrl-single,pins = <0x02c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_17_pwm_pin: pinmux_P8_17_pwm_pin {
+		pinctrl-single,pins = <0x02c  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_18 (ZCZ ball V12) */
+	P8_18_default_pin: pinmux_P8_18_default_pin {
+		pinctrl-single,pins = <0x08c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_18_gpio_pin: pinmux_P8_18_gpio_pin {
+		pinctrl-single,pins = <0x08c  0x2F>; };     /* Mode 7, RxActive */
+	P8_18_gpio_pu_pin: pinmux_P8_18_gpio_pu_pin {
+		pinctrl-single,pins = <0x08c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_18_gpio_pd_pin: pinmux_P8_18_gpio_pd_pin {
+		pinctrl-single,pins = <0x08c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+
+	/* P8_19 (ZCZ ball U10) */
+	P8_19_default_pin: pinmux_P8_19_default_pin {
+		pinctrl-single,pins = <0x020  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_19_gpio_pin: pinmux_P8_19_gpio_pin {
+		pinctrl-single,pins = <0x020  0x2F>; };     /* Mode 7, RxActive */
+	P8_19_gpio_pu_pin: pinmux_P8_19_gpio_pu_pin {
+		pinctrl-single,pins = <0x020  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_19_gpio_pd_pin: pinmux_P8_19_gpio_pd_pin {
+		pinctrl-single,pins = <0x020  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_19_pwm_pin: pinmux_P8_19_pwm_pin {
+		pinctrl-single,pins = <0x020  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_20 (ZCZ ball V9 ) emmc    */
+	/* P8_21 (ZCZ ball U9 ) emmc    */
+	/* P8_22 (ZCZ ball V8 ) emmc    */
+	/* P8_23 (ZCZ ball U8 ) emmc    */
+	/* P8_24 (ZCZ ball V7 ) emmc    */
+	/* P8_25 (ZCZ ball U7 ) emmc    */
+
+	/* P8_26 (ZCZ ball V6 ) */
+	P8_26_default_pin: pinmux_P8_26_default_pin {
+		pinctrl-single,pins = <0x07c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_26_gpio_pin: pinmux_P8_26_gpio_pin {
+		pinctrl-single,pins = <0x07c  0x2F>; };     /* Mode 7, RxActive */
+	P8_26_gpio_pu_pin: pinmux_P8_26_gpio_pu_pin {
+		pinctrl-single,pins = <0x07c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_26_gpio_pd_pin: pinmux_P8_26_gpio_pd_pin {
+		pinctrl-single,pins = <0x07c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+
+	/* P8_27 (ZCZ ball U5 ) hdmi    */
+	P8_27_default_pin: pinmux_P8_27_default_pin {
+		pinctrl-single,pins = <0x0e0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_27_gpio_pin: pinmux_P8_27_gpio_pin {
+		pinctrl-single,pins = <0x0e0  0x2F>; };     /* Mode 7, RxActive */
+	P8_27_gpio_pu_pin: pinmux_P8_27_gpio_pu_pin {
+		pinctrl-single,pins = <0x0e0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_27_gpio_pd_pin: pinmux_P8_27_gpio_pd_pin {
+		pinctrl-single,pins = <0x0e0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_27_pruout_pin: pinmux_P8_27_pruout_pin {
+		pinctrl-single,pins = <0x0e0  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_27_pruin_pin: pinmux_P8_27_pruin_pin {
+		pinctrl-single,pins = <0x0e0  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_27_hdmi_pin: pinmux_P8_27_hdmi_pin {
+		pinctrl-single,pins = <0x0e0  0x00>; };     /* lcd_vsync.lcd_vsync, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT */
+
+	/* P8_28 (ZCZ ball V5 ) hdmi    */
+	P8_28_default_pin: pinmux_P8_28_default_pin {
+		pinctrl-single,pins = <0x0e8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_28_gpio_pin: pinmux_P8_28_gpio_pin {
+		pinctrl-single,pins = <0x0e8  0x2F>; };     /* Mode 7, RxActive */
+	P8_28_gpio_pu_pin: pinmux_P8_28_gpio_pu_pin {
+		pinctrl-single,pins = <0x0e8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_28_gpio_pd_pin: pinmux_P8_28_gpio_pd_pin {
+		pinctrl-single,pins = <0x0e8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_28_pruout_pin: pinmux_P8_28_pruout_pin {
+		pinctrl-single,pins = <0x0e8  0x05>; };     /* Mode 5, Pull-Down */
+	P8_28_pruin_pin: pinmux_P8_28_pruin_pin {
+		pinctrl-single,pins = <0x0e8  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_28_hdmi_pin: pinmux_P8_28_hdmi_pin {
+		pinctrl-single,pins = <0x0e8  0x00>; };     /* lcd_pclk.lcd_pclk, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT */
+
+	/* P8_29 (ZCZ ball R5 ) hdmi    */
+	P8_29_default_pin: pinmux_P8_29_default_pin {
+		pinctrl-single,pins = <0x0e4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_29_gpio_pin: pinmux_P8_29_gpio_pin {
+		pinctrl-single,pins = <0x0e4  0x2F>; };     /* Mode 7, RxActive */
+	P8_29_gpio_pu_pin: pinmux_P8_29_gpio_pu_pin {
+		pinctrl-single,pins = <0x0e4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_29_gpio_pd_pin: pinmux_P8_29_gpio_pd_pin {
+		pinctrl-single,pins = <0x0e4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_29_pruout_pin: pinmux_P8_29_pruout_pin {
+		pinctrl-single,pins = <0x0e4  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_29_pruin_pin: pinmux_P8_29_pruin_pin {
+		pinctrl-single,pins = <0x0e4  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_29_hdmi_pin: pinmux_P8_29_hdmi_pin {
+		pinctrl-single,pins = <0x0e4  0x00>; };     /* lcd_hsync.lcd_hsync, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT */
+
+	/* P8_30 (ZCZ ball R6 ) hdmi    */
+	P8_30_default_pin: pinmux_P8_30_default_pin {
+		pinctrl-single,pins = <0x0ec  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_30_gpio_pin: pinmux_P8_30_gpio_pin {
+		pinctrl-single,pins = <0x0ec  0x2F>; };     /* Mode 7, RxActive */
+	P8_30_gpio_pu_pin: pinmux_P8_30_gpio_pu_pin {
+		pinctrl-single,pins = <0x0ec  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_30_gpio_pd_pin: pinmux_P8_30_gpio_pd_pin {
+		pinctrl-single,pins = <0x0ec  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_30_pruout_pin: pinmux_P8_30_pruout_pin {
+		pinctrl-single,pins = <0x0ec  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_30_pruin_pin: pinmux_P8_30_pruin_pin {
+		pinctrl-single,pins = <0x0ec  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_30_hdmi_pin: pinmux_P8_30_hdmi_pin {
+		pinctrl-single,pins = <0x0ec  0x00>; };     /* lcd_ac_bias_en.lcd_ac_bias_en, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT */
+
+	/* P8_31 (ZCZ ball V4 ) hdmi    */
+	P8_31_default_pin: pinmux_P8_31_default_pin {
+		pinctrl-single,pins = <0x0d8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_31_gpio_pin: pinmux_P8_31_gpio_pin {
+		pinctrl-single,pins = <0x0d8  0x2F>; };     /* Mode 7, RxActive */
+	P8_31_gpio_pu_pin: pinmux_P8_31_gpio_pu_pin {
+		pinctrl-single,pins = <0x0d8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_31_gpio_pd_pin: pinmux_P8_31_gpio_pd_pin {
+		pinctrl-single,pins = <0x0d8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_31_uart_pin: pinmux_P8_31_uart_pin {
+		pinctrl-single,pins = <0x0d8  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+	P8_31_hdmi_pin: pinmux_P8_31_hdmi_pin {
+		pinctrl-single,pins = <0x0d8  0x08>; };     /* lcd_data14.lcd_data14, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_32 (ZCZ ball T5 ) hdmi    */
+	P8_32_default_pin: pinmux_P8_32_default_pin {
+		pinctrl-single,pins = <0x0dc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_32_gpio_pin: pinmux_P8_32_gpio_pin {
+		pinctrl-single,pins = <0x0dc  0x2F>; };     /* Mode 7, RxActive */
+	P8_32_gpio_pu_pin: pinmux_P8_32_gpio_pu_pin {
+		pinctrl-single,pins = <0x0dc  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_32_gpio_pd_pin: pinmux_P8_32_gpio_pd_pin {
+		pinctrl-single,pins = <0x0dc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_32_uart_pin: pinmux_P8_32_uart_pin {
+		pinctrl-single,pins = <0x0dc  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_32_hdmi_pin: pinmux_P8_32_hdmi_pin {
+		pinctrl-single,pins = <0x0dc  0x08>; };     /* lcd_data15.lcd_data15, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_33 (ZCZ ball V3 ) hdmi    */
+	P8_33_default_pin: pinmux_P8_33_default_pin {
+		pinctrl-single,pins = <0x0d4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_33_gpio_pin: pinmux_P8_33_gpio_pin {
+		pinctrl-single,pins = <0x0d4  0x2F>; };     /* Mode 7, RxActive */
+	P8_33_gpio_pu_pin: pinmux_P8_33_gpio_pu_pin {
+		pinctrl-single,pins = <0x0d4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_33_gpio_pd_pin: pinmux_P8_33_gpio_pd_pin {
+		pinctrl-single,pins = <0x0d4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_33_hdmi_pin: pinmux_P8_33_hdmi_pin {
+		pinctrl-single,pins = <0x0d4  0x08>; };     /* lcd_data13.lcd_data13, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+	P8_33_qep_pin: pinmux_P8_33_qep_pin {
+		pinctrl-single,pins = <0x0d4  0x22>; };     /* Mode 2, Pull-Down, RxActive */
+
+	/* P8_34 (ZCZ ball U4 ) hdmi    */
+	P8_34_default_pin: pinmux_P8_34_default_pin {
+		pinctrl-single,pins = <0x0cc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_34_gpio_pin: pinmux_P8_34_gpio_pin {
+		pinctrl-single,pins = <0x0cc  0x2F>; };     /* Mode 7, RxActive */
+	P8_34_gpio_pu_pin: pinmux_P8_34_gpio_pu_pin {
+		pinctrl-single,pins = <0x0cc  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_34_gpio_pd_pin: pinmux_P8_34_gpio_pd_pin {
+		pinctrl-single,pins = <0x0cc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_34_pwm_pin: pinmux_P8_34_pwm_pin {
+		pinctrl-single,pins = <0x0cc  0x22>; };     /* Mode 2, Pull-Down, RxActive */
+	P8_34_hdmi_pin: pinmux_P8_34_hdmi_pin {
+		pinctrl-single,pins = <0x0cc  0x08>; };     /* lcd_data11.lcd_data11, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_35 (ZCZ ball V2 ) hdmi    */
+	P8_35_default_pin: pinmux_P8_35_default_pin {
+		pinctrl-single,pins = <0x0d0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_35_gpio_pin: pinmux_P8_35_gpio_pin {
+		pinctrl-single,pins = <0x0d0  0x2F>; };     /* Mode 7, RxActive */
+	P8_35_gpio_pu_pin: pinmux_P8_35_gpio_pu_pin {
+		pinctrl-single,pins = <0x0d0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_35_gpio_pd_pin: pinmux_P8_35_gpio_pd_pin {
+		pinctrl-single,pins = <0x0d0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_35_hdmi_pin: pinmux_P8_35_hdmi_pin {
+		pinctrl-single,pins = <0x0d0  0x08>; };     /* lcd_data12.lcd_data12, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+	P8_35_qep_pin: pinmux_P8_35_qep_pin {
+		pinctrl-single,pins = <0x0d0  0x22>; };     /* Mode 2, Pull-Down, RxActive */
+
+	/* P8_36 (ZCZ ball U3 ) hdmi    */
+	P8_36_default_pin: pinmux_P8_36_default_pin {
+		pinctrl-single,pins = <0x0c8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_36_gpio_pin: pinmux_P8_36_gpio_pin {
+		pinctrl-single,pins = <0x0c8  0x2F>; };     /* Mode 7, RxActive */
+	P8_36_gpio_pu_pin: pinmux_P8_36_gpio_pu_pin {
+		pinctrl-single,pins = <0x0c8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_36_gpio_pd_pin: pinmux_P8_36_gpio_pd_pin {
+		pinctrl-single,pins = <0x0c8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_36_pwm_pin: pinmux_P8_36_pwm_pin {
+		pinctrl-single,pins = <0x0c8  0x22>; };     /* Mode 2, Pull-Down, RxActive */
+	P8_36_hdmi_pin: pinmux_P8_36_hdmi_pin {
+		pinctrl-single,pins = <0x0c8  0x08>; };     /* lcd_data10.lcd_data10, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_37 (ZCZ ball U1 ) hdmi    */
+	P8_37_default_pin: pinmux_P8_37_default_pin {
+		pinctrl-single,pins = <0x0c0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_37_gpio_pin: pinmux_P8_37_gpio_pin {
+		pinctrl-single,pins = <0x0c0  0x2F>; };     /* Mode 7, RxActive */
+	P8_37_gpio_pu_pin: pinmux_P8_37_gpio_pu_pin {
+		pinctrl-single,pins = <0x0c0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_37_gpio_pd_pin: pinmux_P8_37_gpio_pd_pin {
+		pinctrl-single,pins = <0x0c0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_37_uart_pin: pinmux_P8_37_uart_pin {
+		pinctrl-single,pins = <0x0c0  0x04>; };     /* Mode 4, Pull-Down*/
+	P8_37_pwm_pin: pinmux_P8_37_pwm_pin {
+		pinctrl-single,pins = <0x0c0  0x02>; };     /* Mode 2, Pull-Down*/
+	P8_37_hdmi_pin: pinmux_P8_37_hdmi_pin {
+		pinctrl-single,pins = <0x0c0  0x08>; };     /* lcd_data8.lcd_data8, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+
+	/* P8_38 (ZCZ ball U2 ) hdmi    */
+	P8_38_default_pin: pinmux_P8_38_default_pin {
+		pinctrl-single,pins = <0x0c4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_38_gpio_pin: pinmux_P8_38_gpio_pin {
+		pinctrl-single,pins = <0x0c4  0x2F>; };     /* Mode 7, RxActive */
+	P8_38_gpio_pu_pin: pinmux_P8_38_gpio_pu_pin {
+		pinctrl-single,pins = <0x0c4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_38_gpio_pd_pin: pinmux_P8_38_gpio_pd_pin {
+		pinctrl-single,pins = <0x0c4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_38_uart_pin: pinmux_P8_38_uart_pin {
+		pinctrl-single,pins = <0x0c4  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+	P8_38_pwm_pin: pinmux_P8_38_pwm_pin {
+		pinctrl-single,pins = <0x0c4  0x22>; };     /* Mode 2, Pull-Down, RxActive */
+	P8_38_hdmi_pin: pinmux_P8_38_hdmi_pin {
+		pinctrl-single,pins = <0x0c4  0x08>; };     /* lcd_data9.lcd_data9, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+
+	/* P8_39 (ZCZ ball T3 ) hdmi    */
+	P8_39_default_pin: pinmux_P8_39_default_pin {
+		pinctrl-single,pins = <0x0b8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_39_gpio_pin: pinmux_P8_39_gpio_pin {
+		pinctrl-single,pins = <0x0b8  0x2F>; };     /* Mode 7, RxActive */
+	P8_39_gpio_pu_pin: pinmux_P8_39_gpio_pu_pin {
+		pinctrl-single,pins = <0x0b8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_39_gpio_pd_pin: pinmux_P8_39_gpio_pd_pin {
+		pinctrl-single,pins = <0x0b8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_39_pruout_pin: pinmux_P8_39_pruout_pin {
+		pinctrl-single,pins = <0x0b8  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_39_pruin_pin: pinmux_P8_39_pruin_pin {
+		pinctrl-single,pins = <0x0b8  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_39_hdmi_pin: pinmux_P8_39_hdmi_pin {
+		pinctrl-single,pins = <0x0b8  0x08>; };     /* lcd_data6.lcd_data6, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_40 (ZCZ ball T4 ) hdmi    */
+	P8_40_default_pin: pinmux_P8_40_default_pin {
+		pinctrl-single,pins = <0x0bc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_40_gpio_pin: pinmux_P8_40_gpio_pin {
+		pinctrl-single,pins = <0x0bc  0x2F>; };     /* Mode 7, RxActive */
+	P8_40_gpio_pu_pin: pinmux_P8_40_gpio_pu_pin {
+		pinctrl-single,pins = <0x0bc  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_40_gpio_pd_pin: pinmux_P8_40_gpio_pd_pin {
+		pinctrl-single,pins = <0x0bc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_40_pruout_pin: pinmux_P8_40_pruout_pin {
+		pinctrl-single,pins = <0x0bc  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_40_pruin_pin: pinmux_P8_40_pruin_pin {
+		pinctrl-single,pins = <0x0bc  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_40_hdmi_pin: pinmux_P8_40_hdmi_pin {
+		pinctrl-single,pins = <0x0bc  0x08>; };     /* lcd_data7.lcd_data7, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_41 (ZCZ ball T1 ) hdmi    */
+	P8_41_default_pin: pinmux_P8_41_default_pin {
+		pinctrl-single,pins = <0x0b0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_41_gpio_pin: pinmux_P8_41_gpio_pin {
+		pinctrl-single,pins = <0x0b0  0x2F>; };     /* Mode 7, RxActive */
+	P8_41_gpio_pu_pin: pinmux_P8_41_gpio_pu_pin {
+		pinctrl-single,pins = <0x0b0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_41_gpio_pd_pin: pinmux_P8_41_gpio_pd_pin {
+		pinctrl-single,pins = <0x0b0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_41_pruout_pin: pinmux_P8_41_pruout_pin {
+		pinctrl-single,pins = <0x0b0  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_41_pruin_pin: pinmux_P8_41_pruin_pin {
+		pinctrl-single,pins = <0x0b0  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_41_hdmi_pin: pinmux_P8_41_hdmi_pin {
+		pinctrl-single,pins = <0x0b0  0x08>; };     /* lcd_data4.lcd_data4, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_42 (ZCZ ball T2 ) hdmi    */
+	P8_42_default_pin: pinmux_P8_42_default_pin {
+		pinctrl-single,pins = <0x0b4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_42_gpio_pin: pinmux_P8_42_gpio_pin {
+		pinctrl-single,pins = <0x0b4  0x2F>; };     /* Mode 7, RxActive */
+	P8_42_gpio_pu_pin: pinmux_P8_42_gpio_pu_pin {
+		pinctrl-single,pins = <0x0b4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_42_gpio_pd_pin: pinmux_P8_42_gpio_pd_pin {
+		pinctrl-single,pins = <0x0b4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_42_pruout_pin: pinmux_P8_42_pruout_pin {
+		pinctrl-single,pins = <0x0b4  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_42_pruin_pin: pinmux_P8_42_pruin_pin {
+		pinctrl-single,pins = <0x0b4  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_42_hdmi_pin: pinmux_P8_42_hdmi_pin {
+		pinctrl-single,pins = <0x0b4  0x08>; };     /* lcd_data5.lcd_data5, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_43 (ZCZ ball R3 ) hdmi    */
+	P8_43_default_pin: pinmux_P8_43_default_pin {
+		pinctrl-single,pins = <0x0a8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_43_gpio_pin: pinmux_P8_43_gpio_pin {
+		pinctrl-single,pins = <0x0a8  0x2F>; };     /* Mode 7, RxActive */
+	P8_43_gpio_pu_pin: pinmux_P8_43_gpio_pu_pin {
+		pinctrl-single,pins = <0x0a8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_43_gpio_pd_pin: pinmux_P8_43_gpio_pd_pin {
+		pinctrl-single,pins = <0x0a8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_43_pruout_pin: pinmux_P8_43_pruout_pin {
+		pinctrl-single,pins = <0x0a8  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_43_pruin_pin: pinmux_P8_43_pruin_pin {
+		pinctrl-single,pins = <0x0a8  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_43_pwm_pin: pinmux_P8_43_pwm_pin {
+		pinctrl-single,pins = <0x0a8  0x03>; };     /* Mode 3, Pull-Down  */
+	P8_43_hdmi_pin: pinmux_P8_43_hdmi_pin {
+		pinctrl-single,pins = <0x0a8  0x08>; };     /* lcd_data2.lcd_data2, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_44 (ZCZ ball R4 ) hdmi    */
+	P8_44_default_pin: pinmux_P8_44_default_pin {
+		pinctrl-single,pins = <0x0ac  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_44_gpio_pin: pinmux_P8_44_gpio_pin {
+		pinctrl-single,pins = <0x0ac  0x2F>; };     /* Mode 7, RxActive */
+	P8_44_gpio_pu_pin: pinmux_P8_44_gpio_pu_pin {
+		pinctrl-single,pins = <0x0ac  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_44_gpio_pd_pin: pinmux_P8_44_gpio_pd_pin {
+		pinctrl-single,pins = <0x0ac  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_44_pruout_pin: pinmux_P8_44_pruout_pin {
+		pinctrl-single,pins = <0x0ac  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_44_pruin_pin: pinmux_P8_44_pruin_pin {
+		pinctrl-single,pins = <0x0ac  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_44_pwm_pin: pinmux_P8_44_pwm_pin {
+		pinctrl-single,pins = <0x0ac  0x23>; };     /* Mode 3, Pull-Down, RxActive */
+	P8_44_hdmi_pin: pinmux_P8_44_hdmi_pin {
+		pinctrl-single,pins = <0x0ac  0x08>; };     /* lcd_data3.lcd_data3, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_45 (ZCZ ball R1 ) hdmi    */
+	P8_45_default_pin: pinmux_P8_45_default_pin {
+		pinctrl-single,pins = <0x0a0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_45_gpio_pin: pinmux_P8_45_gpio_pin {
+		pinctrl-single,pins = <0x0a0  0x2F>; };     /* Mode 7, RxActive */
+	P8_45_gpio_pu_pin: pinmux_P8_45_gpio_pu_pin {
+		pinctrl-single,pins = <0x0a0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_45_gpio_pd_pin: pinmux_P8_45_gpio_pd_pin {
+		pinctrl-single,pins = <0x0a0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_45_pruout_pin: pinmux_P8_45_pruout_pin {
+		pinctrl-single,pins = <0x0a0  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_45_pruin_pin: pinmux_P8_45_pruin_pin {
+		pinctrl-single,pins = <0x0a0  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_45_pwm_pin: pinmux_P8_45_pwm_pin {
+		pinctrl-single,pins = <0x0a0  0x03>; };     /* Mode 3, Pull-Down*/
+	P8_45_hdmi_pin: pinmux_P8_45_hdmi_pin {
+		pinctrl-single,pins = <0x0a0  0x08>; };     /* lcd_data0.lcd_data0, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_46 (ZCZ ball R2 ) hdmi    */
+	P8_46_default_pin: pinmux_P8_46_default_pin {
+		pinctrl-single,pins = <0x0a4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_46_gpio_pin: pinmux_P8_46_gpio_pin {
+		pinctrl-single,pins = <0x0a4  0x2F>; };     /* Mode 7, RxActive */
+	P8_46_gpio_pu_pin: pinmux_P8_46_gpio_pu_pin {
+		pinctrl-single,pins = <0x0a4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_46_gpio_pd_pin: pinmux_P8_46_gpio_pd_pin {
+		pinctrl-single,pins = <0x0a4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_46_pruout_pin: pinmux_P8_46_pruout_pin {
+		pinctrl-single,pins = <0x0a4  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_46_pruin_pin: pinmux_P8_46_pruin_pin {
+		pinctrl-single,pins = <0x0a4  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_46_pwm_pin: pinmux_P8_46_pwm_pin {
+		pinctrl-single,pins = <0x0a4  0x03>; };     /* Mode 3, Pull-Down*/
+	P8_46_hdmi_pin: pinmux_P8_46_hdmi_pin {
+		pinctrl-single,pins = <0x0a4  0x08>; };     /* lcd_data1.lcd_data1, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/************************/
+	/* P9 Header            */
+	/************************/
+
+	/* P9_01                GND     */
+	/* P9_02                GND     */
+	/* P9_03                3.3V    */
+	/* P9_04                3.3V    */
+	/* P9_05                VDD_5V  */
+	/* P9_06                VDD_5V  */
+	/* P9_07                SYS_5V  */
+	/* P9_08                SYS_5V  */
+	/* P9_09                PWR_BUT */
+	/* P9_10 (ZCZ ball A10) RESETn  */
+
+	/* P9_11 (ZCZ ball T17) */
+	P9_11_default_pin: pinmux_P9_11_default_pin {
+		pinctrl-single,pins = <0x070  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_11_gpio_pin: pinmux_P9_11_gpio_pin {
+		pinctrl-single,pins = <0x070  0x2F>; };     /* Mode 7, RxActive */
+	P9_11_gpio_pu_pin: pinmux_P9_11_gpio_pu_pin {
+		pinctrl-single,pins = <0x070  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_11_gpio_pd_pin: pinmux_P9_11_gpio_pd_pin {
+		pinctrl-single,pins = <0x070  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_11_uart_pin: pinmux_P9_11_uart_pin {
+		pinctrl-single,pins = <0x070  0x36>; };     /* Mode 6, Pull-Up, RxActive */
+
+	/* P9_12 (ZCZ ball U18) */
+	P9_12_default_pin: pinmux_P9_12_default_pin {
+		pinctrl-single,pins = <0x078  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_12_gpio_pin: pinmux_P9_12_gpio_pin {
+		pinctrl-single,pins = <0x078  0x2F>; };     /* Mode 7, RxActive */
+	P9_12_gpio_pu_pin: pinmux_P9_12_gpio_pu_pin {
+		pinctrl-single,pins = <0x078  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_12_gpio_pd_pin: pinmux_P9_12_gpio_pd_pin {
+		pinctrl-single,pins = <0x078  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+
+	/* P9_13 (ZCZ ball U17) */
+	P9_13_default_pin: pinmux_P9_13_default_pin {
+		pinctrl-single,pins = <0x074  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_13_gpio_pin: pinmux_P9_13_gpio_pin {
+		pinctrl-single,pins = <0x074  0x2F>; };     /* Mode 7, RxActive */
+	P9_13_gpio_pu_pin: pinmux_P9_13_gpio_pu_pin {
+		pinctrl-single,pins = <0x074  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_13_gpio_pd_pin: pinmux_P9_13_gpio_pd_pin {
+		pinctrl-single,pins = <0x074  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_13_uart_pin: pinmux_P9_13_uart_pin {
+		pinctrl-single,pins = <0x074  0x36>; };     /* Mode 6, Pull-Up, RxActive */
+
+	/* P9_14 (ZCZ ball U14) */
+	P9_14_default_pin: pinmux_P9_14_default_pin {
+		pinctrl-single,pins = <0x048  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_14_gpio_pin: pinmux_P9_14_gpio_pin {
+		pinctrl-single,pins = <0x048  0x2F>; };     /* Mode 7, RxActive */
+	P9_14_gpio_pu_pin: pinmux_P9_14_gpio_pu_pin {
+		pinctrl-single,pins = <0x048  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_14_gpio_pd_pin: pinmux_P9_14_gpio_pd_pin {
+		pinctrl-single,pins = <0x048  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_14_pwm_pin: pinmux_P9_14_pwm_pin {
+		pinctrl-single,pins = <0x048  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_15 (ZCZ ball R13) */
+	P9_15_default_pin: pinmux_P9_15_default_pin {
+		pinctrl-single,pins = <0x040  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_15_gpio_pin: pinmux_P9_15_gpio_pin {
+		pinctrl-single,pins = <0x040  0x2F>; };     /* Mode 7, RxActive */
+	P9_15_gpio_pu_pin: pinmux_P9_15_gpio_pu_pin {
+		pinctrl-single,pins = <0x040  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_15_gpio_pd_pin: pinmux_P9_15_gpio_pd_pin {
+		pinctrl-single,pins = <0x040  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_15_pwm_pin: pinmux_P9_15_pwm_pin {
+		pinctrl-single,pins = <0x040  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_16 (ZCZ ball T14) */
+	P9_16_default_pin: pinmux_P9_16_default_pin {
+		pinctrl-single,pins = <0x04c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_16_gpio_pin: pinmux_P9_16_gpio_pin {
+		pinctrl-single,pins = <0x04c  0x2F>; };     /* Mode 7, RxActive */
+	P9_16_gpio_pu_pin: pinmux_P9_16_gpio_pu_pin {
+		pinctrl-single,pins = <0x04c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_16_gpio_pd_pin: pinmux_P9_16_gpio_pd_pin {
+		pinctrl-single,pins = <0x04c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_16_pwm_pin: pinmux_P9_16_pwm_pin {
+		pinctrl-single,pins = <0x04c  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_17 (ZCZ ball A16) */
+	P9_17_default_pin: pinmux_P9_17_default_pin {
+		pinctrl-single,pins = <0x15c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_17_gpio_pin: pinmux_P9_17_gpio_pin {
+		pinctrl-single,pins = <0x15c  0x2F>; };     /* Mode 7, RxActive */
+	P9_17_gpio_pu_pin: pinmux_P9_17_gpio_pu_pin {
+		pinctrl-single,pins = <0x15c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_17_gpio_pd_pin: pinmux_P9_17_gpio_pd_pin {
+		pinctrl-single,pins = <0x15c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_17_spi_pin: pinmux_P9_17_spi_pin {
+		pinctrl-single,pins = <0x15c  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_17_i2c_pin: pinmux_P9_17_i2c_pin {
+		pinctrl-single,pins = <0x15c  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_17_pwm_pin: pinmux_P9_17_pwm_pin {
+		pinctrl-single,pins = <0x15c  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+
+	/* P9_18 (ZCZ ball B16) */
+	P9_18_default_pin: pinmux_P9_18_default_pin {
+		pinctrl-single,pins = <0x158  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_18_gpio_pin: pinmux_P9_18_gpio_pin {
+		pinctrl-single,pins = <0x158  0x2F>; };     /* Mode 7, RxActive */
+	P9_18_gpio_pu_pin: pinmux_P9_18_gpio_pu_pin {
+		pinctrl-single,pins = <0x158  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_18_gpio_pd_pin: pinmux_P9_18_gpio_pd_pin {
+		pinctrl-single,pins = <0x158  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_18_spi_pin: pinmux_P9_18_spi_pin {
+		pinctrl-single,pins = <0x158  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_18_i2c_pin: pinmux_P9_18_i2c_pin {
+		pinctrl-single,pins = <0x158  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_18_pwm_pin: pinmux_P9_18_pwm_pin {
+		pinctrl-single,pins = <0x158  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+
+	/* P9_19 (ZCZ ball D17) */
+	P9_19_default_pin: pinmux_P9_19_default_pin {
+		pinctrl-single,pins = <0x17c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_19_gpio_pin: pinmux_P9_19_gpio_pin {
+		pinctrl-single,pins = <0x17c  0x2F>; };     /* Mode 7, RxActive */
+	P9_19_gpio_pu_pin: pinmux_P9_19_gpio_pu_pin {
+		pinctrl-single,pins = <0x17c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_19_gpio_pd_pin: pinmux_P9_19_gpio_pd_pin {
+		pinctrl-single,pins = <0x17c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_19_can_pin: pinmux_P9_19_can_pin {
+		pinctrl-single,pins = <0x17c  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_19_i2c_pin: pinmux_P9_19_i2c_pin {
+		pinctrl-single,pins = <0x17c  0x73>; };     /* (SLEWCTRL_SLOW | PIN_INPUT_PULLUP | MUX_MODE3) */
+
+	/* P9_20 (ZCZ ball D18) */
+	P9_20_default_pin: pinmux_P9_20_default_pin {
+		pinctrl-single,pins = <0x178  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_20_gpio_pin: pinmux_P9_20_gpio_pin {
+		pinctrl-single,pins = <0x178  0x2F>; };     /* Mode 7, RxActive */
+	P9_20_gpio_pu_pin: pinmux_P9_20_gpio_pu_pin {
+		pinctrl-single,pins = <0x178  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_20_gpio_pd_pin: pinmux_P9_20_gpio_pd_pin {
+		pinctrl-single,pins = <0x178  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_20_can_pin: pinmux_P9_20_can_pin {
+		pinctrl-single,pins = <0x178  0x12>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_20_i2c_pin: pinmux_P9_20_i2c_pin {
+		pinctrl-single,pins = <0x178  0x73>; };     /* (SLEWCTRL_SLOW | PIN_INPUT_PULLUP | MUX_MODE3) */
+
+	/* P9_21 (ZCZ ball B17) */
+	P9_21_default_pin: pinmux_P9_21_default_pin {
+		pinctrl-single,pins = <0x154  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_21_gpio_pin: pinmux_P9_21_gpio_pin {
+		pinctrl-single,pins = <0x154  0x2F>; };     /* Mode 7, RxActive */
+	P9_21_gpio_pu_pin: pinmux_P9_21_gpio_pu_pin {
+		pinctrl-single,pins = <0x154  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_21_gpio_pd_pin: pinmux_P9_21_gpio_pd_pin {
+		pinctrl-single,pins = <0x154  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_21_spi_pin: pinmux_P9_21_spi_pin {
+		pinctrl-single,pins = <0x154  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_21_uart_pin: pinmux_P9_21_uart_pin {
+		pinctrl-single,pins = <0x154  0x31>; };     /* Mode 1, Pull-Up, RxActive */
+	P9_21_i2c_pin: pinmux_P9_21_i2c_pin {
+		pinctrl-single,pins = <0x154  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_21_pwm_pin: pinmux_P9_21_pwm_pin {
+		pinctrl-single,pins = <0x154  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+
+	/* P9_22 (ZCZ ball A17) */
+	P9_22_default_pin: pinmux_P9_22_default_pin {
+		pinctrl-single,pins = <0x150  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_22_gpio_pin: pinmux_P9_22_gpio_pin {
+		pinctrl-single,pins = <0x150  0x2F>; };     /* Mode 7, RxActive */
+	P9_22_gpio_pu_pin: pinmux_P9_22_gpio_pu_pin {
+		pinctrl-single,pins = <0x150  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_22_gpio_pd_pin: pinmux_P9_22_gpio_pd_pin {
+		pinctrl-single,pins = <0x150  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_22_spi_pin: pinmux_P9_22_spi_pin {
+		pinctrl-single,pins = <0x150  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_22_uart_pin: pinmux_P9_22_uart_pin {
+		pinctrl-single,pins = <0x150  0x31>; };     /* Mode 1, Pull-Up, RxActive */
+	P9_22_i2c_pin: pinmux_P9_22_i2c_pin {
+		pinctrl-single,pins = <0x150  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_22_pwm_pin: pinmux_P9_22_pwm_pin {
+		pinctrl-single,pins = <0x150  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+
+	/* P9_23 (ZCZ ball V14) */
+	P9_23_default_pin: pinmux_P9_23_default_pin {
+		pinctrl-single,pins = <0x044  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_23_gpio_pin: pinmux_P9_23_gpio_pin {
+		pinctrl-single,pins = <0x044  0x2F>; };     /* Mode 7, RxActive */
+	P9_23_gpio_pu_pin: pinmux_P9_23_gpio_pu_pin {
+		pinctrl-single,pins = <0x044  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_23_gpio_pd_pin: pinmux_P9_23_gpio_pd_pin {
+		pinctrl-single,pins = <0x044  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_23_pwm_pin: pinmux_P9_23_pwm_pin {
+		pinctrl-single,pins = <0x044  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_24 (ZCZ ball D15) */
+	P9_24_default_pin: pinmux_P9_24_default_pin {
+		pinctrl-single,pins = <0x184  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_24_gpio_pin: pinmux_P9_24_gpio_pin {
+		pinctrl-single,pins = <0x184  0x2F>; };     /* Mode 7, RxActive */
+	P9_24_gpio_pu_pin: pinmux_P9_24_gpio_pu_pin {
+		pinctrl-single,pins = <0x184  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_24_gpio_pd_pin: pinmux_P9_24_gpio_pd_pin {
+		pinctrl-single,pins = <0x184  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_24_uart_pin: pinmux_P9_24_uart_pin {
+		pinctrl-single,pins = <0x184  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_24_can_pin: pinmux_P9_24_can_pin {
+		pinctrl-single,pins = <0x184  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_24_i2c_pin: pinmux_P9_24_i2c_pin {
+		pinctrl-single,pins = <0x184  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+	P9_24_pruin_pin: pinmux_P9_24_pruin_pin {
+		pinctrl-single,pins = <0x184  0x36>; };     /* Mode 6, Pull-Up, RxActive */
+
+	/* P9_25 (ZCZ ball A14) Audio   */
+	P9_25_default_pin: pinmux_P9_25_default_pin {
+		pinctrl-single,pins = <0x1ac  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_25_gpio_pin: pinmux_P9_25_gpio_pin {
+		pinctrl-single,pins = <0x1ac  0x2F>; };     /* Mode 7, RxActive */
+	P9_25_gpio_pu_pin: pinmux_P9_25_gpio_pu_pin {
+		pinctrl-single,pins = <0x1ac  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_25_gpio_pd_pin: pinmux_P9_25_gpio_pd_pin {
+		pinctrl-single,pins = <0x1ac  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_25_qep_pin: pinmux_P9_25_qep_pin {
+		pinctrl-single,pins = <0x1ac  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_25_pruout_pin: pinmux_P9_25_pruout_pin {
+		pinctrl-single,pins = <0x1ac  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_25_pruin_pin: pinmux_P9_25_pruin_pin {
+		pinctrl-single,pins = <0x1ac  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P9_25_audio_pin: pinmux_P9_25_audio_pin {
+		pinctrl-single,pins = <0x1ac  (PIN_INPUT_PULLUP | MUX_MODE0)>; };	/* mcasp0_ahclkx.mcasp0_ahclkx */
+
+	/* P9_26 (ZCZ ball D16) */
+	P9_26_default_pin: pinmux_P9_26_default_pin {
+		pinctrl-single,pins = <0x180  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_26_gpio_pin: pinmux_P9_26_gpio_pin {
+		pinctrl-single,pins = <0x180  0x2F>; };     /* Mode 7, RxActive */
+	P9_26_gpio_pu_pin: pinmux_P9_26_gpio_pu_pin {
+		pinctrl-single,pins = <0x180  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_26_gpio_pd_pin: pinmux_P9_26_gpio_pd_pin {
+		pinctrl-single,pins = <0x180  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_26_uart_pin: pinmux_P9_26_uart_pin {
+		pinctrl-single,pins = <0x180  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_26_can_pin: pinmux_P9_26_can_pin {
+		pinctrl-single,pins = <0x180  0x12>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_26_i2c_pin: pinmux_P9_26_i2c_pin {
+		pinctrl-single,pins = <0x180  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+	P9_26_pruin_pin: pinmux_P9_26_pruin_pin {
+		pinctrl-single,pins = <0x180  0x36>; };     /* Mode 6, Pull-Up, RxActive */
+
+	/* P9_27 (ZCZ ball C13) */
+	P9_27_default_pin: pinmux_P9_27_default_pin {
+		pinctrl-single,pins = <0x1a4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_27_gpio_pin: pinmux_P9_27_gpio_pin {
+		pinctrl-single,pins = <0x1a4  0x2F>; };     /* Mode 7, RxActive */
+	P9_27_gpio_pu_pin: pinmux_P9_27_gpio_pu_pin {
+		pinctrl-single,pins = <0x1a4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_27_gpio_pd_pin: pinmux_P9_27_gpio_pd_pin {
+		pinctrl-single,pins = <0x1a4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_27_qep_pin: pinmux_P9_27_qep_pin {
+		pinctrl-single,pins = <0x1a4  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_27_pruout_pin: pinmux_P9_27_pruout_pin {
+		pinctrl-single,pins = <0x1a4  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_27_pruin_pin: pinmux_P9_27_pruin_pin {
+		pinctrl-single,pins = <0x1a4  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_28 (ZCZ ball C12) Audio   */
+	P9_28_default_pin: pinmux_P9_28_default_pin {
+		pinctrl-single,pins = <0x19c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_28_gpio_pin: pinmux_P9_28_gpio_pin {
+		pinctrl-single,pins = <0x19c  0x2F>; };     /* Mode 7, RxActive */
+	P9_28_gpio_pu_pin: pinmux_P9_28_gpio_pu_pin {
+		pinctrl-single,pins = <0x19c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_28_gpio_pd_pin: pinmux_P9_28_gpio_pd_pin {
+		pinctrl-single,pins = <0x19c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_28_pwm_pin: pinmux_P9_28_pwm_pin {
+		pinctrl-single,pins = <0x19c  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_28_spi_pin: pinmux_P9_28_spi_pin {
+		pinctrl-single,pins = <0x19c  0x23>; };     /* Mode 3, Pull-Down, RxActive */
+	P9_28_pwm2_pin: pinmux_P9_28_pwm2_pin {
+		pinctrl-single,pins = <0x19c  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+	P9_28_pruout_pin: pinmux_P9_28_pruout_pin {
+		pinctrl-single,pins = <0x19c  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_28_pruin_pin: pinmux_P9_28_pruin_pin {
+		pinctrl-single,pins = <0x19c  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P9_28_audio_pin: pinmux_P9_28_audio_pin {
+		pinctrl-single,pins = <0x19c  (PIN_OUTPUT_PULLDOWN | MUX_MODE2)>; };	/* mcasp0_ahclkr.mcasp0_axr2 */
+
+	/* P9_29 (ZCZ ball B13) Audio   */
+	P9_29_default_pin: pinmux_P9_29_default_pin {
+		pinctrl-single,pins = <0x194  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_29_gpio_pin: pinmux_P9_29_gpio_pin {
+		pinctrl-single,pins = <0x194  0x2F>; };     /* Mode 7, RxActive */
+	P9_29_gpio_pu_pin: pinmux_P9_29_gpio_pu_pin {
+		pinctrl-single,pins = <0x194  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_29_gpio_pd_pin: pinmux_P9_29_gpio_pd_pin {
+		pinctrl-single,pins = <0x194  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_29_pwm_pin: pinmux_P9_29_pwm_pin {
+		pinctrl-single,pins = <0x194  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_29_spi_pin: pinmux_P9_29_spi_pin {
+		pinctrl-single,pins = <0x194  0x23>; };     /* Mode 3, Pull-Down, RxActive */
+	P9_29_pruout_pin: pinmux_P9_29_pruout_pin {
+		pinctrl-single,pins = <0x194  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_29_pruin_pin: pinmux_P9_29_pruin_pin {
+		pinctrl-single,pins = <0x194  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P9_29_audio_pin: pinmux_P9_29_audio_pin {
+		pinctrl-single,pins = <0x194  (PIN_OUTPUT_PULLUP | MUX_MODE0)>; };	/* mcasp0_fsx.mcasp0_fsx */
+
+	/* P9_30 (ZCZ ball D12) */
+	P9_30_default_pin: pinmux_P9_30_default_pin {
+		pinctrl-single,pins = <0x198  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_30_gpio_pin: pinmux_P9_30_gpio_pin {
+		pinctrl-single,pins = <0x198  0x2F>; };     /* Mode 7, RxActive */
+	P9_30_gpio_pu_pin: pinmux_P9_30_gpio_pu_pin {
+		pinctrl-single,pins = <0x198  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_30_gpio_pd_pin: pinmux_P9_30_gpio_pd_pin {
+		pinctrl-single,pins = <0x198  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_30_pwm_pin: pinmux_P9_30_pwm_pin {
+		pinctrl-single,pins = <0x198  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_30_spi_pin: pinmux_P9_30_spi_pin {
+		pinctrl-single,pins = <0x198  0x23>; };     /* Mode 3, Pull-Down, RxActive */
+	P9_30_pruout_pin: pinmux_P9_30_pruout_pin {
+		pinctrl-single,pins = <0x198  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_30_pruin_pin: pinmux_P9_30_pruin_pin {
+		pinctrl-single,pins = <0x198  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_31 (ZCZ ball A13) Audio   */
+	P9_31_default_pin: pinmux_P9_31_default_pin {
+		pinctrl-single,pins = <0x190  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_31_gpio_pin: pinmux_P9_31_gpio_pin {
+		pinctrl-single,pins = <0x190  0x2F>; };     /* Mode 7, RxActive */
+	P9_31_gpio_pu_pin: pinmux_P9_31_gpio_pu_pin {
+		pinctrl-single,pins = <0x190  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_31_gpio_pd_pin: pinmux_P9_31_gpio_pd_pin {
+		pinctrl-single,pins = <0x190  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_31_pwm_pin: pinmux_P9_31_pwm_pin {
+		pinctrl-single,pins = <0x190  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_31_spi_pin: pinmux_P9_31_spi_pin {
+		pinctrl-single,pins = <0x190  0x23>; };     /* Mode 3, Pull-Down, RxActive */
+	P9_31_pruout_pin: pinmux_P9_31_pruout_pin {
+		pinctrl-single,pins = <0x190  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_31_pruin_pin: pinmux_P9_31_pruin_pin {
+		pinctrl-single,pins = <0x190  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P9_31_audio_pin: pinmux_P9_31_audio_pin {
+		pinctrl-single,pins = <0x190  (PIN_OUTPUT_PULLDOWN | MUX_MODE0)>; };	/* mcasp0_aclkx.mcasp0_aclkx */
+
+	/* P9_32                VADC    */
+	/* P9_33 (ZCZ ball C8 ) AIN4    */
+	/* P9_34                AGND    */
+	/* P9_35 (ZCZ ball A8 ) AIN6    */
+	/* P9_36 (ZCZ ball B8 ) AIN5    */
+	/* P9_37 (ZCZ ball B7 ) AIN2    */
+	/* P9_38 (ZCZ ball A7 ) AIN3    */
+	/* P9_39 (ZCZ ball B6 ) AIN0    */
+	/* P9_40 (ZCZ ball C7 ) AIN1    */
+
+	/* P9_41 (ZCZ ball D14) */
+	P9_41_default_pin: pinmux_P9_41_default_pin {
+		pinctrl-single,pins = <0x1b4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_41_gpio_pin: pinmux_P9_41_gpio_pin {
+		pinctrl-single,pins = <0x1b4  0x2F>; };     /* Mode 7, RxActive */
+	P9_41_gpio_pu_pin: pinmux_P9_41_gpio_pu_pin {
+		pinctrl-single,pins = <0x1b4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_41_gpio_pd_pin: pinmux_P9_41_gpio_pd_pin {
+		pinctrl-single,pins = <0x1b4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_41_timer_pin: pinmux_P9_41_timer_pin {
+		pinctrl-single,pins = <0x1b4  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+	P9_41_pruin_pin: pinmux_P9_41_pruin_pin {
+		pinctrl-single,pins = <0x1b4  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+
+	/* P9_41.1              */
+	/* P9_91 (ZCZ ball D13) */
+	P9_91_default_pin: pinmux_P9_91_default_pin {
+		pinctrl-single,pins = <0x1a8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_91_gpio_pin: pinmux_P9_91_gpio_pin {
+		pinctrl-single,pins = <0x1a8  0x2F>; };     /* Mode 7, RxActive */
+	P9_91_gpio_pu_pin: pinmux_P9_91_gpio_pu_pin {
+		pinctrl-single,pins = <0x1a8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_91_gpio_pd_pin: pinmux_P9_91_gpio_pd_pin {
+		pinctrl-single,pins = <0x1a8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_91_qep_pin: pinmux_P9_91_qep_pin {
+		pinctrl-single,pins = <0x1a8  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_91_pruout_pin: pinmux_P9_91_pruout_pin {
+		pinctrl-single,pins = <0x1a8  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_91_pruin_pin: pinmux_P9_91_pruin_pin {
+		pinctrl-single,pins = <0x1a8  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_42 (ZCZ ball C18) */
+	P9_42_default_pin: pinmux_P9_42_default_pin {
+		pinctrl-single,pins = <0x164  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_42_gpio_pin: pinmux_P9_42_gpio_pin {
+		pinctrl-single,pins = <0x164  0x2F>; };     /* Mode 7, RxActive */
+	P9_42_gpio_pu_pin: pinmux_P9_42_gpio_pu_pin {
+		pinctrl-single,pins = <0x164  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_42_gpio_pd_pin: pinmux_P9_42_gpio_pd_pin {
+		pinctrl-single,pins = <0x164  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_42_pwm_pin: pinmux_P9_42_pwm_pin {
+		pinctrl-single,pins = <0x164  0x20>; };     /* Mode 0, Pull-Down, RxActive */
+	P9_42_uart_pin: pinmux_P9_42_uart_pin {
+		pinctrl-single,pins = <0x164  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_42_spics_pin: pinmux_P9_42_spics_pin {
+		pinctrl-single,pins = <0x164  0x22>; };     /* Mode 2, Pull-Down, RxActive */
+	P9_42_spiclk_pin: pinmux_P9_42_spiclk_pin {
+		pinctrl-single,pins = <0x164  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P9_42.1              */
+	/* P9_92 (ZCZ ball B12) */
+	P9_92_default_pin: pinmux_P9_92_default_pin {
+		pinctrl-single,pins = <0x1a0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_92_gpio_pin: pinmux_P9_92_gpio_pin {
+		pinctrl-single,pins = <0x1a0  0x2F>; };     /* Mode 7, RxActive */
+	P9_92_gpio_pu_pin: pinmux_P9_92_gpio_pu_pin {
+		pinctrl-single,pins = <0x1a0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_92_gpio_pd_pin: pinmux_P9_92_gpio_pd_pin {
+		pinctrl-single,pins = <0x1a0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_92_qep_pin: pinmux_P9_92_qep_pin {
+		pinctrl-single,pins = <0x1a0  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_92_pruout_pin: pinmux_P9_92_pruout_pin {
+		pinctrl-single,pins = <0x1a0  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_92_pruin_pin: pinmux_P9_92_pruin_pin {
+		pinctrl-single,pins = <0x1a0  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_43                GND     */
+	/* P9_44                GND     */
+	/* P9_45                GND     */
+	/* P9_46                GND     */
+};
diff --git b/arch/arm/boot/dts/am335x-bone-common-universal.dtsi b/arch/arm/boot/dts/am335x-bone-common-universal.dtsi
new file mode 100644
index 0000000..781e33f
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-common-universal.dtsi
@@ -0,0 +1,2052 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&am33xx_pinmux {
+	/************************/
+	/* P8 Header            */
+	/************************/
+
+	/* P8_01                GND     */
+	/* P8_02                GND     */
+	/* P8_03 (ZCZ ball R9 ) emmc    */
+	/* P8_04 (ZCZ ball T9 ) emmc    */
+	/* P8_05 (ZCZ ball R8 ) emmc    */
+	/* P8_06 (ZCZ ball T8 ) emmc    */
+
+	/* P8_07 (ZCZ ball R7 ) */
+	P8_07_default_pin: pinmux_P8_07_default_pin {
+		pinctrl-single,pins = <0x090  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_07_gpio_pin: pinmux_P8_07_gpio_pin {
+		pinctrl-single,pins = <0x090  0x2F>; };     /* Mode 7, RxActive */
+	P8_07_gpio_pu_pin: pinmux_P8_07_gpio_pu_pin {
+		pinctrl-single,pins = <0x090  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_07_gpio_pd_pin: pinmux_P8_07_gpio_pd_pin {
+		pinctrl-single,pins = <0x090  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_07_timer_pin: pinmux_P8_07_timer_pin {
+		pinctrl-single,pins = <0x090  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+
+	/* P8_08 (ZCZ ball T7 ) */
+	P8_08_default_pin: pinmux_P8_08_default_pin {
+		pinctrl-single,pins = <0x094  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_08_gpio_pin: pinmux_P8_08_gpio_pin {
+		pinctrl-single,pins = <0x094  0x2F>; };     /* Mode 7, RxActive */
+	P8_08_gpio_pu_pin: pinmux_P8_08_gpio_pu_pin {
+		pinctrl-single,pins = <0x094  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_08_gpio_pd_pin: pinmux_P8_08_gpio_pd_pin {
+		pinctrl-single,pins = <0x094  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_08_timer_pin: pinmux_P8_08_timer_pin {
+		pinctrl-single,pins = <0x094  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+
+	/* P8_09 (ZCZ ball T6 ) */
+	P8_09_default_pin: pinmux_P8_09_default_pin {
+		pinctrl-single,pins = <0x09c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_09_gpio_pin: pinmux_P8_09_gpio_pin {
+		pinctrl-single,pins = <0x09c  0x2F>; };     /* Mode 7, RxActive */
+	P8_09_gpio_pu_pin: pinmux_P8_09_gpio_pu_pin {
+		pinctrl-single,pins = <0x09c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_09_gpio_pd_pin: pinmux_P8_09_gpio_pd_pin {
+		pinctrl-single,pins = <0x09c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_09_timer_pin: pinmux_P8_09_timer_pin {
+		pinctrl-single,pins = <0x09c  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+
+	/* P8_10 (ZCZ ball U6 ) */
+	P8_10_default_pin: pinmux_P8_10_default_pin {
+		pinctrl-single,pins = <0x098  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_10_gpio_pin: pinmux_P8_10_gpio_pin {
+		pinctrl-single,pins = <0x098  0x2F>; };     /* Mode 7, RxActive */
+	P8_10_gpio_pu_pin: pinmux_P8_10_gpio_pu_pin {
+		pinctrl-single,pins = <0x098  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_10_gpio_pd_pin: pinmux_P8_10_gpio_pd_pin {
+		pinctrl-single,pins = <0x098  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_10_timer_pin: pinmux_P8_10_timer_pin {
+		pinctrl-single,pins = <0x098  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+
+	/* P8_11 (ZCZ ball R12) */
+	P8_11_default_pin: pinmux_P8_11_default_pin {
+		pinctrl-single,pins = <0x034  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_11_gpio_pin: pinmux_P8_11_gpio_pin {
+		pinctrl-single,pins = <0x034  0x2F>; };     /* Mode 7, RxActive */
+	P8_11_gpio_pu_pin: pinmux_P8_11_gpio_pu_pin {
+		pinctrl-single,pins = <0x034  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_11_gpio_pd_pin: pinmux_P8_11_gpio_pd_pin {
+		pinctrl-single,pins = <0x034  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_11_pruout_pin: pinmux_P8_11_pruout_pin {
+		pinctrl-single,pins = <0x034  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_11_qep_pin: pinmux_P8_11_qep_pin {
+		pinctrl-single,pins = <0x034  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_12 (ZCZ ball T12) */
+	P8_12_default_pin: pinmux_P8_12_default_pin {
+		pinctrl-single,pins = <0x030  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_12_gpio_pin: pinmux_P8_12_gpio_pin {
+		pinctrl-single,pins = <0x030  0x2F>; };     /* Mode 7, RxActive */
+	P8_12_gpio_pu_pin: pinmux_P8_12_gpio_pu_pin {
+		pinctrl-single,pins = <0x030  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_12_gpio_pd_pin: pinmux_P8_12_gpio_pd_pin {
+		pinctrl-single,pins = <0x030  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_12_pruout_pin: pinmux_P8_12_pruout_pin {
+		pinctrl-single,pins = <0x030  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_12_qep_pin: pinmux_P8_12_qep_pin {
+		pinctrl-single,pins = <0x030  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_13 (ZCZ ball T10) */
+	P8_13_default_pin: pinmux_P8_13_default_pin {
+		pinctrl-single,pins = <0x024  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_13_gpio_pin: pinmux_P8_13_gpio_pin {
+		pinctrl-single,pins = <0x024  0x2F>; };     /* Mode 7, RxActive */
+	P8_13_gpio_pu_pin: pinmux_P8_13_gpio_pu_pin {
+		pinctrl-single,pins = <0x024  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_13_gpio_pd_pin: pinmux_P8_13_gpio_pd_pin {
+		pinctrl-single,pins = <0x024  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_13_pwm_pin: pinmux_P8_13_pwm_pin {
+		pinctrl-single,pins = <0x024  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_14 (ZCZ ball T11) */
+	P8_14_default_pin: pinmux_P8_14_default_pin {
+		pinctrl-single,pins = <0x028  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_14_gpio_pin: pinmux_P8_14_gpio_pin {
+		pinctrl-single,pins = <0x028  0x2F>; };     /* Mode 7, RxActive */
+	P8_14_gpio_pu_pin: pinmux_P8_14_gpio_pu_pin {
+		pinctrl-single,pins = <0x028  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_14_gpio_pd_pin: pinmux_P8_14_gpio_pd_pin {
+		pinctrl-single,pins = <0x028  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_14_pwm_pin: pinmux_P8_14_pwm_pin {
+		pinctrl-single,pins = <0x028  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_15 (ZCZ ball U13) */
+	P8_15_default_pin: pinmux_P8_15_default_pin {
+		pinctrl-single,pins = <0x03c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_15_gpio_pin: pinmux_P8_15_gpio_pin {
+		pinctrl-single,pins = <0x03c  0x2F>; };     /* Mode 7, RxActive */
+	P8_15_gpio_pu_pin: pinmux_P8_15_gpio_pu_pin {
+		pinctrl-single,pins = <0x03c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_15_gpio_pd_pin: pinmux_P8_15_gpio_pd_pin {
+		pinctrl-single,pins = <0x03c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_15_pruin_pin: pinmux_P8_15_pruin_pin {
+		pinctrl-single,pins = <0x03c  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_15_qep_pin: pinmux_P8_15_qep_pin {
+		pinctrl-single,pins = <0x03c  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_16 (ZCZ ball V13) */
+	P8_16_default_pin: pinmux_P8_16_default_pin {
+		pinctrl-single,pins = <0x038  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_16_gpio_pin: pinmux_P8_16_gpio_pin {
+		pinctrl-single,pins = <0x038  0x2F>; };     /* Mode 7, RxActive */
+	P8_16_gpio_pu_pin: pinmux_P8_16_gpio_pu_pin {
+		pinctrl-single,pins = <0x038  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_16_gpio_pd_pin: pinmux_P8_16_gpio_pd_pin {
+		pinctrl-single,pins = <0x038  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_16_pruin_pin: pinmux_P8_16_pruin_pin {
+		pinctrl-single,pins = <0x038  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_16_qep_pin: pinmux_P8_16_qep_pin {
+		pinctrl-single,pins = <0x038  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_17 (ZCZ ball U12) */
+	P8_17_default_pin: pinmux_P8_17_default_pin {
+		pinctrl-single,pins = <0x02c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_17_gpio_pin: pinmux_P8_17_gpio_pin {
+		pinctrl-single,pins = <0x02c  0x2F>; };     /* Mode 7, RxActive */
+	P8_17_gpio_pu_pin: pinmux_P8_17_gpio_pu_pin {
+		pinctrl-single,pins = <0x02c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_17_gpio_pd_pin: pinmux_P8_17_gpio_pd_pin {
+		pinctrl-single,pins = <0x02c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_17_pwm_pin: pinmux_P8_17_pwm_pin {
+		pinctrl-single,pins = <0x02c  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_18 (ZCZ ball V12) */
+	P8_18_default_pin: pinmux_P8_18_default_pin {
+		pinctrl-single,pins = <0x08c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_18_gpio_pin: pinmux_P8_18_gpio_pin {
+		pinctrl-single,pins = <0x08c  0x2F>; };     /* Mode 7, RxActive */
+	P8_18_gpio_pu_pin: pinmux_P8_18_gpio_pu_pin {
+		pinctrl-single,pins = <0x08c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_18_gpio_pd_pin: pinmux_P8_18_gpio_pd_pin {
+		pinctrl-single,pins = <0x08c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+
+	/* P8_19 (ZCZ ball U10) */
+	P8_19_default_pin: pinmux_P8_19_default_pin {
+		pinctrl-single,pins = <0x020  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_19_gpio_pin: pinmux_P8_19_gpio_pin {
+		pinctrl-single,pins = <0x020  0x2F>; };     /* Mode 7, RxActive */
+	P8_19_gpio_pu_pin: pinmux_P8_19_gpio_pu_pin {
+		pinctrl-single,pins = <0x020  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_19_gpio_pd_pin: pinmux_P8_19_gpio_pd_pin {
+		pinctrl-single,pins = <0x020  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_19_pwm_pin: pinmux_P8_19_pwm_pin {
+		pinctrl-single,pins = <0x020  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P8_20 (ZCZ ball V9 ) emmc    */
+	/* P8_21 (ZCZ ball U9 ) emmc    */
+	/* P8_22 (ZCZ ball V8 ) emmc    */
+	/* P8_23 (ZCZ ball U8 ) emmc    */
+	/* P8_24 (ZCZ ball V7 ) emmc    */
+	/* P8_25 (ZCZ ball U7 ) emmc    */
+
+	/* P8_26 (ZCZ ball V6 ) */
+	P8_26_default_pin: pinmux_P8_26_default_pin {
+		pinctrl-single,pins = <0x07c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_26_gpio_pin: pinmux_P8_26_gpio_pin {
+		pinctrl-single,pins = <0x07c  0x2F>; };     /* Mode 7, RxActive */
+	P8_26_gpio_pu_pin: pinmux_P8_26_gpio_pu_pin {
+		pinctrl-single,pins = <0x07c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_26_gpio_pd_pin: pinmux_P8_26_gpio_pd_pin {
+		pinctrl-single,pins = <0x07c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+
+	/* P8_27 (ZCZ ball U5 ) hdmi    */
+	P8_27_default_pin: pinmux_P8_27_default_pin {
+		pinctrl-single,pins = <0x0e0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_27_gpio_pin: pinmux_P8_27_gpio_pin {
+		pinctrl-single,pins = <0x0e0  0x2F>; };     /* Mode 7, RxActive */
+	P8_27_gpio_pu_pin: pinmux_P8_27_gpio_pu_pin {
+		pinctrl-single,pins = <0x0e0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_27_gpio_pd_pin: pinmux_P8_27_gpio_pd_pin {
+		pinctrl-single,pins = <0x0e0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_27_pruout_pin: pinmux_P8_27_pruout_pin {
+		pinctrl-single,pins = <0x0e0  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_27_pruin_pin: pinmux_P8_27_pruin_pin {
+		pinctrl-single,pins = <0x0e0  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_27_hdmi_pin: pinmux_P8_27_hdmi_pin {
+		pinctrl-single,pins = <0x0e0  0x00>; };     /* lcd_vsync.lcd_vsync, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT */
+
+	/* P8_28 (ZCZ ball V5 ) hdmi    */
+	P8_28_default_pin: pinmux_P8_28_default_pin {
+		pinctrl-single,pins = <0x0e8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_28_gpio_pin: pinmux_P8_28_gpio_pin {
+		pinctrl-single,pins = <0x0e8  0x2F>; };     /* Mode 7, RxActive */
+	P8_28_gpio_pu_pin: pinmux_P8_28_gpio_pu_pin {
+		pinctrl-single,pins = <0x0e8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_28_gpio_pd_pin: pinmux_P8_28_gpio_pd_pin {
+		pinctrl-single,pins = <0x0e8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_28_pruout_pin: pinmux_P8_28_pruout_pin {
+		pinctrl-single,pins = <0x0e8  0x05>; };     /* Mode 5, Pull-Down */
+	P8_28_pruin_pin: pinmux_P8_28_pruin_pin {
+		pinctrl-single,pins = <0x0e8  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_28_hdmi_pin: pinmux_P8_28_hdmi_pin {
+		pinctrl-single,pins = <0x0e8  0x00>; };     /* lcd_pclk.lcd_pclk, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT */
+
+	/* P8_29 (ZCZ ball R5 ) hdmi    */
+	P8_29_default_pin: pinmux_P8_29_default_pin {
+		pinctrl-single,pins = <0x0e4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_29_gpio_pin: pinmux_P8_29_gpio_pin {
+		pinctrl-single,pins = <0x0e4  0x2F>; };     /* Mode 7, RxActive */
+	P8_29_gpio_pu_pin: pinmux_P8_29_gpio_pu_pin {
+		pinctrl-single,pins = <0x0e4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_29_gpio_pd_pin: pinmux_P8_29_gpio_pd_pin {
+		pinctrl-single,pins = <0x0e4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_29_pruout_pin: pinmux_P8_29_pruout_pin {
+		pinctrl-single,pins = <0x0e4  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_29_pruin_pin: pinmux_P8_29_pruin_pin {
+		pinctrl-single,pins = <0x0e4  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_29_hdmi_pin: pinmux_P8_29_hdmi_pin {
+		pinctrl-single,pins = <0x0e4  0x00>; };     /* lcd_hsync.lcd_hsync, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT */
+
+	/* P8_30 (ZCZ ball R6 ) hdmi    */
+	P8_30_default_pin: pinmux_P8_30_default_pin {
+		pinctrl-single,pins = <0x0ec  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_30_gpio_pin: pinmux_P8_30_gpio_pin {
+		pinctrl-single,pins = <0x0ec  0x2F>; };     /* Mode 7, RxActive */
+	P8_30_gpio_pu_pin: pinmux_P8_30_gpio_pu_pin {
+		pinctrl-single,pins = <0x0ec  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_30_gpio_pd_pin: pinmux_P8_30_gpio_pd_pin {
+		pinctrl-single,pins = <0x0ec  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_30_pruout_pin: pinmux_P8_30_pruout_pin {
+		pinctrl-single,pins = <0x0ec  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_30_pruin_pin: pinmux_P8_30_pruin_pin {
+		pinctrl-single,pins = <0x0ec  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_30_hdmi_pin: pinmux_P8_30_hdmi_pin {
+		pinctrl-single,pins = <0x0ec  0x00>; };     /* lcd_ac_bias_en.lcd_ac_bias_en, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT */
+
+	/* P8_31 (ZCZ ball V4 ) hdmi    */
+	P8_31_default_pin: pinmux_P8_31_default_pin {
+		pinctrl-single,pins = <0x0d8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_31_gpio_pin: pinmux_P8_31_gpio_pin {
+		pinctrl-single,pins = <0x0d8  0x2F>; };     /* Mode 7, RxActive */
+	P8_31_gpio_pu_pin: pinmux_P8_31_gpio_pu_pin {
+		pinctrl-single,pins = <0x0d8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_31_gpio_pd_pin: pinmux_P8_31_gpio_pd_pin {
+		pinctrl-single,pins = <0x0d8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_31_uart_pin: pinmux_P8_31_uart_pin {
+		pinctrl-single,pins = <0x0d8  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+	P8_31_hdmi_pin: pinmux_P8_31_hdmi_pin {
+		pinctrl-single,pins = <0x0d8  0x08>; };     /* lcd_data14.lcd_data14, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_32 (ZCZ ball T5 ) hdmi    */
+	P8_32_default_pin: pinmux_P8_32_default_pin {
+		pinctrl-single,pins = <0x0dc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_32_gpio_pin: pinmux_P8_32_gpio_pin {
+		pinctrl-single,pins = <0x0dc  0x2F>; };     /* Mode 7, RxActive */
+	P8_32_gpio_pu_pin: pinmux_P8_32_gpio_pu_pin {
+		pinctrl-single,pins = <0x0dc  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_32_gpio_pd_pin: pinmux_P8_32_gpio_pd_pin {
+		pinctrl-single,pins = <0x0dc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_32_uart_pin: pinmux_P8_32_uart_pin {
+		pinctrl-single,pins = <0x0dc  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_32_hdmi_pin: pinmux_P8_32_hdmi_pin {
+		pinctrl-single,pins = <0x0dc  0x08>; };     /* lcd_data15.lcd_data15, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_33 (ZCZ ball V3 ) hdmi    */
+	P8_33_default_pin: pinmux_P8_33_default_pin {
+		pinctrl-single,pins = <0x0d4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_33_gpio_pin: pinmux_P8_33_gpio_pin {
+		pinctrl-single,pins = <0x0d4  0x2F>; };     /* Mode 7, RxActive */
+	P8_33_gpio_pu_pin: pinmux_P8_33_gpio_pu_pin {
+		pinctrl-single,pins = <0x0d4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_33_gpio_pd_pin: pinmux_P8_33_gpio_pd_pin {
+		pinctrl-single,pins = <0x0d4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_33_hdmi_pin: pinmux_P8_33_hdmi_pin {
+		pinctrl-single,pins = <0x0d4  0x08>; };     /* lcd_data13.lcd_data13, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_34 (ZCZ ball U4 ) hdmi    */
+	P8_34_default_pin: pinmux_P8_34_default_pin {
+		pinctrl-single,pins = <0x0cc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_34_gpio_pin: pinmux_P8_34_gpio_pin {
+		pinctrl-single,pins = <0x0cc  0x2F>; };     /* Mode 7, RxActive */
+	P8_34_gpio_pu_pin: pinmux_P8_34_gpio_pu_pin {
+		pinctrl-single,pins = <0x0cc  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_34_gpio_pd_pin: pinmux_P8_34_gpio_pd_pin {
+		pinctrl-single,pins = <0x0cc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_34_pwm_pin: pinmux_P8_34_pwm_pin {
+		pinctrl-single,pins = <0x0cc  0x22>; };     /* Mode 2, Pull-Down, RxActive */
+	P8_34_hdmi_pin: pinmux_P8_34_hdmi_pin {
+		pinctrl-single,pins = <0x0cc  0x08>; };     /* lcd_data11.lcd_data11, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_35 (ZCZ ball V2 ) hdmi    */
+	P8_35_default_pin: pinmux_P8_35_default_pin {
+		pinctrl-single,pins = <0x0d0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_35_gpio_pin: pinmux_P8_35_gpio_pin {
+		pinctrl-single,pins = <0x0d0  0x2F>; };     /* Mode 7, RxActive */
+	P8_35_gpio_pu_pin: pinmux_P8_35_gpio_pu_pin {
+		pinctrl-single,pins = <0x0d0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_35_gpio_pd_pin: pinmux_P8_35_gpio_pd_pin {
+		pinctrl-single,pins = <0x0d0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_35_hdmi_pin: pinmux_P8_35_hdmi_pin {
+		pinctrl-single,pins = <0x0d0  0x08>; };     /* lcd_data12.lcd_data12, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_36 (ZCZ ball U3 ) hdmi    */
+	P8_36_default_pin: pinmux_P8_36_default_pin {
+		pinctrl-single,pins = <0x0c8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_36_gpio_pin: pinmux_P8_36_gpio_pin {
+		pinctrl-single,pins = <0x0c8  0x2F>; };     /* Mode 7, RxActive */
+	P8_36_gpio_pu_pin: pinmux_P8_36_gpio_pu_pin {
+		pinctrl-single,pins = <0x0c8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_36_gpio_pd_pin: pinmux_P8_36_gpio_pd_pin {
+		pinctrl-single,pins = <0x0c8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_36_pwm_pin: pinmux_P8_36_pwm_pin {
+		pinctrl-single,pins = <0x0c8  0x22>; };     /* Mode 2, Pull-Down, RxActive */
+	P8_36_hdmi_pin: pinmux_P8_36_hdmi_pin {
+		pinctrl-single,pins = <0x0c8  0x08>; };     /* lcd_data10.lcd_data10, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_37 (ZCZ ball U1 ) hdmi    */
+	P8_37_default_pin: pinmux_P8_37_default_pin {
+		pinctrl-single,pins = <0x0c0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_37_gpio_pin: pinmux_P8_37_gpio_pin {
+		pinctrl-single,pins = <0x0c0  0x2F>; };     /* Mode 7, RxActive */
+	P8_37_gpio_pu_pin: pinmux_P8_37_gpio_pu_pin {
+		pinctrl-single,pins = <0x0c0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_37_gpio_pd_pin: pinmux_P8_37_gpio_pd_pin {
+		pinctrl-single,pins = <0x0c0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_37_uart_pin: pinmux_P8_37_uart_pin {
+		pinctrl-single,pins = <0x0c0  0x04>; };     /* Mode 4, Pull-Down*/
+	P8_37_pwm_pin: pinmux_P8_37_pwm_pin {
+		pinctrl-single,pins = <0x0c0  0x02>; };     /* Mode 2, Pull-Down*/
+	P8_37_hdmi_pin: pinmux_P8_37_hdmi_pin {
+		pinctrl-single,pins = <0x0c0  0x08>; };     /* lcd_data8.lcd_data8, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+
+	/* P8_38 (ZCZ ball U2 ) hdmi    */
+	P8_38_default_pin: pinmux_P8_38_default_pin {
+		pinctrl-single,pins = <0x0c4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_38_gpio_pin: pinmux_P8_38_gpio_pin {
+		pinctrl-single,pins = <0x0c4  0x2F>; };     /* Mode 7, RxActive */
+	P8_38_gpio_pu_pin: pinmux_P8_38_gpio_pu_pin {
+		pinctrl-single,pins = <0x0c4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_38_gpio_pd_pin: pinmux_P8_38_gpio_pd_pin {
+		pinctrl-single,pins = <0x0c4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_38_uart_pin: pinmux_P8_38_uart_pin {
+		pinctrl-single,pins = <0x0c4  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+	P8_38_pwm_pin: pinmux_P8_38_pwm_pin {
+		pinctrl-single,pins = <0x0c4  0x22>; };     /* Mode 2, Pull-Down, RxActive */
+	P8_38_hdmi_pin: pinmux_P8_38_hdmi_pin {
+		pinctrl-single,pins = <0x0c4  0x08>; };     /* lcd_data9.lcd_data9, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+
+	/* P8_39 (ZCZ ball T3 ) hdmi    */
+	P8_39_default_pin: pinmux_P8_39_default_pin {
+		pinctrl-single,pins = <0x0b8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_39_gpio_pin: pinmux_P8_39_gpio_pin {
+		pinctrl-single,pins = <0x0b8  0x2F>; };     /* Mode 7, RxActive */
+	P8_39_gpio_pu_pin: pinmux_P8_39_gpio_pu_pin {
+		pinctrl-single,pins = <0x0b8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_39_gpio_pd_pin: pinmux_P8_39_gpio_pd_pin {
+		pinctrl-single,pins = <0x0b8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_39_pruout_pin: pinmux_P8_39_pruout_pin {
+		pinctrl-single,pins = <0x0b8  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_39_pruin_pin: pinmux_P8_39_pruin_pin {
+		pinctrl-single,pins = <0x0b8  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_39_hdmi_pin: pinmux_P8_39_hdmi_pin {
+		pinctrl-single,pins = <0x0b8  0x08>; };     /* lcd_data6.lcd_data6, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_40 (ZCZ ball T4 ) hdmi    */
+	P8_40_default_pin: pinmux_P8_40_default_pin {
+		pinctrl-single,pins = <0x0bc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_40_gpio_pin: pinmux_P8_40_gpio_pin {
+		pinctrl-single,pins = <0x0bc  0x2F>; };     /* Mode 7, RxActive */
+	P8_40_gpio_pu_pin: pinmux_P8_40_gpio_pu_pin {
+		pinctrl-single,pins = <0x0bc  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_40_gpio_pd_pin: pinmux_P8_40_gpio_pd_pin {
+		pinctrl-single,pins = <0x0bc  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_40_pruout_pin: pinmux_P8_40_pruout_pin {
+		pinctrl-single,pins = <0x0bc  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_40_pruin_pin: pinmux_P8_40_pruin_pin {
+		pinctrl-single,pins = <0x0bc  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_40_hdmi_pin: pinmux_P8_40_hdmi_pin {
+		pinctrl-single,pins = <0x0bc  0x08>; };     /* lcd_data7.lcd_data7, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_41 (ZCZ ball T1 ) hdmi    */
+	P8_41_default_pin: pinmux_P8_41_default_pin {
+		pinctrl-single,pins = <0x0b0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_41_gpio_pin: pinmux_P8_41_gpio_pin {
+		pinctrl-single,pins = <0x0b0  0x2F>; };     /* Mode 7, RxActive */
+	P8_41_gpio_pu_pin: pinmux_P8_41_gpio_pu_pin {
+		pinctrl-single,pins = <0x0b0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_41_gpio_pd_pin: pinmux_P8_41_gpio_pd_pin {
+		pinctrl-single,pins = <0x0b0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_41_pruout_pin: pinmux_P8_41_pruout_pin {
+		pinctrl-single,pins = <0x0b0  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_41_pruin_pin: pinmux_P8_41_pruin_pin {
+		pinctrl-single,pins = <0x0b0  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_41_hdmi_pin: pinmux_P8_41_hdmi_pin {
+		pinctrl-single,pins = <0x0b0  0x08>; };     /* lcd_data4.lcd_data4, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_42 (ZCZ ball T2 ) hdmi    */
+	P8_42_default_pin: pinmux_P8_42_default_pin {
+		pinctrl-single,pins = <0x0b4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_42_gpio_pin: pinmux_P8_42_gpio_pin {
+		pinctrl-single,pins = <0x0b4  0x2F>; };     /* Mode 7, RxActive */
+	P8_42_gpio_pu_pin: pinmux_P8_42_gpio_pu_pin {
+		pinctrl-single,pins = <0x0b4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_42_gpio_pd_pin: pinmux_P8_42_gpio_pd_pin {
+		pinctrl-single,pins = <0x0b4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_42_pruout_pin: pinmux_P8_42_pruout_pin {
+		pinctrl-single,pins = <0x0b4  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_42_pruin_pin: pinmux_P8_42_pruin_pin {
+		pinctrl-single,pins = <0x0b4  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_42_hdmi_pin: pinmux_P8_42_hdmi_pin {
+		pinctrl-single,pins = <0x0b4  0x08>; };     /* lcd_data5.lcd_data5, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_43 (ZCZ ball R3 ) hdmi    */
+	P8_43_default_pin: pinmux_P8_43_default_pin {
+		pinctrl-single,pins = <0x0a8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_43_gpio_pin: pinmux_P8_43_gpio_pin {
+		pinctrl-single,pins = <0x0a8  0x2F>; };     /* Mode 7, RxActive */
+	P8_43_gpio_pu_pin: pinmux_P8_43_gpio_pu_pin {
+		pinctrl-single,pins = <0x0a8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_43_gpio_pd_pin: pinmux_P8_43_gpio_pd_pin {
+		pinctrl-single,pins = <0x0a8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_43_pruout_pin: pinmux_P8_43_pruout_pin {
+		pinctrl-single,pins = <0x0a8  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_43_pruin_pin: pinmux_P8_43_pruin_pin {
+		pinctrl-single,pins = <0x0a8  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_43_pwm_pin: pinmux_P8_43_pwm_pin {
+		pinctrl-single,pins = <0x0a8  0x03>; };     /* Mode 3, Pull-Down  */
+	P8_43_hdmi_pin: pinmux_P8_43_hdmi_pin {
+		pinctrl-single,pins = <0x0a8  0x08>; };     /* lcd_data2.lcd_data2, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_44 (ZCZ ball R4 ) hdmi    */
+	P8_44_default_pin: pinmux_P8_44_default_pin {
+		pinctrl-single,pins = <0x0ac  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_44_gpio_pin: pinmux_P8_44_gpio_pin {
+		pinctrl-single,pins = <0x0ac  0x2F>; };     /* Mode 7, RxActive */
+	P8_44_gpio_pu_pin: pinmux_P8_44_gpio_pu_pin {
+		pinctrl-single,pins = <0x0ac  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_44_gpio_pd_pin: pinmux_P8_44_gpio_pd_pin {
+		pinctrl-single,pins = <0x0ac  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_44_pruout_pin: pinmux_P8_44_pruout_pin {
+		pinctrl-single,pins = <0x0ac  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_44_pruin_pin: pinmux_P8_44_pruin_pin {
+		pinctrl-single,pins = <0x0ac  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_44_pwm_pin: pinmux_P8_44_pwm_pin {
+		pinctrl-single,pins = <0x0ac  0x23>; };     /* Mode 3, Pull-Down, RxActive */
+	P8_44_hdmi_pin: pinmux_P8_44_hdmi_pin {
+		pinctrl-single,pins = <0x0ac  0x08>; };     /* lcd_data3.lcd_data3, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_45 (ZCZ ball R1 ) hdmi    */
+	P8_45_default_pin: pinmux_P8_45_default_pin {
+		pinctrl-single,pins = <0x0a0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_45_gpio_pin: pinmux_P8_45_gpio_pin {
+		pinctrl-single,pins = <0x0a0  0x2F>; };     /* Mode 7, RxActive */
+	P8_45_gpio_pu_pin: pinmux_P8_45_gpio_pu_pin {
+		pinctrl-single,pins = <0x0a0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_45_gpio_pd_pin: pinmux_P8_45_gpio_pd_pin {
+		pinctrl-single,pins = <0x0a0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_45_pruout_pin: pinmux_P8_45_pruout_pin {
+		pinctrl-single,pins = <0x0a0  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_45_pruin_pin: pinmux_P8_45_pruin_pin {
+		pinctrl-single,pins = <0x0a0  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_45_pwm_pin: pinmux_P8_45_pwm_pin {
+		pinctrl-single,pins = <0x0a0  0x03>; };     /* Mode 3, Pull-Down*/
+	P8_45_hdmi_pin: pinmux_P8_45_hdmi_pin {
+		pinctrl-single,pins = <0x0a0  0x08>; };     /* lcd_data0.lcd_data0, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/* P8_46 (ZCZ ball R2 ) hdmi    */
+	P8_46_default_pin: pinmux_P8_46_default_pin {
+		pinctrl-single,pins = <0x0a4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_46_gpio_pin: pinmux_P8_46_gpio_pin {
+		pinctrl-single,pins = <0x0a4  0x2F>; };     /* Mode 7, RxActive */
+	P8_46_gpio_pu_pin: pinmux_P8_46_gpio_pu_pin {
+		pinctrl-single,pins = <0x0a4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P8_46_gpio_pd_pin: pinmux_P8_46_gpio_pd_pin {
+		pinctrl-single,pins = <0x0a4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P8_46_pruout_pin: pinmux_P8_46_pruout_pin {
+		pinctrl-single,pins = <0x0a4  0x05>; };     /* Mode 5, Pull-Down*/
+	P8_46_pruin_pin: pinmux_P8_46_pruin_pin {
+		pinctrl-single,pins = <0x0a4  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P8_46_pwm_pin: pinmux_P8_46_pwm_pin {
+		pinctrl-single,pins = <0x0a4  0x03>; };     /* Mode 3, Pull-Down*/
+	P8_46_hdmi_pin: pinmux_P8_46_hdmi_pin {
+		pinctrl-single,pins = <0x0a4  0x08>; };     /* lcd_data1.lcd_data1, OMAP_MUX_MODE0 | AM33XX_PIN_OUTPUT | AM33XX_PULL_DISA */
+
+	/************************/
+	/* P9 Header            */
+	/************************/
+
+	/* P9_01                GND     */
+	/* P9_02                GND     */
+	/* P9_03                3.3V    */
+	/* P9_04                3.3V    */
+	/* P9_05                VDD_5V  */
+	/* P9_06                VDD_5V  */
+	/* P9_07                SYS_5V  */
+	/* P9_08                SYS_5V  */
+	/* P9_09                PWR_BUT */
+	/* P9_10 (ZCZ ball A10) RESETn  */
+
+	/* P9_11 (ZCZ ball T17) */
+	P9_11_default_pin: pinmux_P9_11_default_pin {
+		pinctrl-single,pins = <0x070  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_11_gpio_pin: pinmux_P9_11_gpio_pin {
+		pinctrl-single,pins = <0x070  0x2F>; };     /* Mode 7, RxActive */
+	P9_11_gpio_pu_pin: pinmux_P9_11_gpio_pu_pin {
+		pinctrl-single,pins = <0x070  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_11_gpio_pd_pin: pinmux_P9_11_gpio_pd_pin {
+		pinctrl-single,pins = <0x070  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_11_uart_pin: pinmux_P9_11_uart_pin {
+		pinctrl-single,pins = <0x070  0x36>; };     /* Mode 6, Pull-Up, RxActive */
+
+	/* P9_12 (ZCZ ball U18) */
+	P9_12_default_pin: pinmux_P9_12_default_pin {
+		pinctrl-single,pins = <0x078  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_12_gpio_pin: pinmux_P9_12_gpio_pin {
+		pinctrl-single,pins = <0x078  0x2F>; };     /* Mode 7, RxActive */
+	P9_12_gpio_pu_pin: pinmux_P9_12_gpio_pu_pin {
+		pinctrl-single,pins = <0x078  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_12_gpio_pd_pin: pinmux_P9_12_gpio_pd_pin {
+		pinctrl-single,pins = <0x078  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+
+	/* P9_13 (ZCZ ball U17) */
+	P9_13_default_pin: pinmux_P9_13_default_pin {
+		pinctrl-single,pins = <0x074  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_13_gpio_pin: pinmux_P9_13_gpio_pin {
+		pinctrl-single,pins = <0x074  0x2F>; };     /* Mode 7, RxActive */
+	P9_13_gpio_pu_pin: pinmux_P9_13_gpio_pu_pin {
+		pinctrl-single,pins = <0x074  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_13_gpio_pd_pin: pinmux_P9_13_gpio_pd_pin {
+		pinctrl-single,pins = <0x074  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_13_uart_pin: pinmux_P9_13_uart_pin {
+		pinctrl-single,pins = <0x074  0x36>; };     /* Mode 6, Pull-Up, RxActive */
+
+	/* P9_14 (ZCZ ball U14) */
+	P9_14_default_pin: pinmux_P9_14_default_pin {
+		pinctrl-single,pins = <0x048  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_14_gpio_pin: pinmux_P9_14_gpio_pin {
+		pinctrl-single,pins = <0x048  0x2F>; };     /* Mode 7, RxActive */
+	P9_14_gpio_pu_pin: pinmux_P9_14_gpio_pu_pin {
+		pinctrl-single,pins = <0x048  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_14_gpio_pd_pin: pinmux_P9_14_gpio_pd_pin {
+		pinctrl-single,pins = <0x048  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_14_pwm_pin: pinmux_P9_14_pwm_pin {
+		pinctrl-single,pins = <0x048  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_15 (ZCZ ball R13) */
+	P9_15_default_pin: pinmux_P9_15_default_pin {
+		pinctrl-single,pins = <0x040  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_15_gpio_pin: pinmux_P9_15_gpio_pin {
+		pinctrl-single,pins = <0x040  0x2F>; };     /* Mode 7, RxActive */
+	P9_15_gpio_pu_pin: pinmux_P9_15_gpio_pu_pin {
+		pinctrl-single,pins = <0x040  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_15_gpio_pd_pin: pinmux_P9_15_gpio_pd_pin {
+		pinctrl-single,pins = <0x040  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_15_pwm_pin: pinmux_P9_15_pwm_pin {
+		pinctrl-single,pins = <0x040  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_16 (ZCZ ball T14) */
+	P9_16_default_pin: pinmux_P9_16_default_pin {
+		pinctrl-single,pins = <0x04c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_16_gpio_pin: pinmux_P9_16_gpio_pin {
+		pinctrl-single,pins = <0x04c  0x2F>; };     /* Mode 7, RxActive */
+	P9_16_gpio_pu_pin: pinmux_P9_16_gpio_pu_pin {
+		pinctrl-single,pins = <0x04c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_16_gpio_pd_pin: pinmux_P9_16_gpio_pd_pin {
+		pinctrl-single,pins = <0x04c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_16_pwm_pin: pinmux_P9_16_pwm_pin {
+		pinctrl-single,pins = <0x04c  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_17 (ZCZ ball A16) */
+	P9_17_default_pin: pinmux_P9_17_default_pin {
+		pinctrl-single,pins = <0x15c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_17_gpio_pin: pinmux_P9_17_gpio_pin {
+		pinctrl-single,pins = <0x15c  0x2F>; };     /* Mode 7, RxActive */
+	P9_17_gpio_pu_pin: pinmux_P9_17_gpio_pu_pin {
+		pinctrl-single,pins = <0x15c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_17_gpio_pd_pin: pinmux_P9_17_gpio_pd_pin {
+		pinctrl-single,pins = <0x15c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_17_spi_pin: pinmux_P9_17_spi_pin {
+		pinctrl-single,pins = <0x15c  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_17_i2c_pin: pinmux_P9_17_i2c_pin {
+		pinctrl-single,pins = <0x15c  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_17_pwm_pin: pinmux_P9_17_pwm_pin {
+		pinctrl-single,pins = <0x15c  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+
+	/* P9_18 (ZCZ ball B16) */
+	P9_18_default_pin: pinmux_P9_18_default_pin {
+		pinctrl-single,pins = <0x158  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_18_gpio_pin: pinmux_P9_18_gpio_pin {
+		pinctrl-single,pins = <0x158  0x2F>; };     /* Mode 7, RxActive */
+	P9_18_gpio_pu_pin: pinmux_P9_18_gpio_pu_pin {
+		pinctrl-single,pins = <0x158  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_18_gpio_pd_pin: pinmux_P9_18_gpio_pd_pin {
+		pinctrl-single,pins = <0x158  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_18_spi_pin: pinmux_P9_18_spi_pin {
+		pinctrl-single,pins = <0x158  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_18_i2c_pin: pinmux_P9_18_i2c_pin {
+		pinctrl-single,pins = <0x158  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_18_pwm_pin: pinmux_P9_18_pwm_pin {
+		pinctrl-single,pins = <0x158  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+
+	/* P9_19 (ZCZ ball D17) */
+	P9_19_default_pin: pinmux_P9_19_default_pin {
+		pinctrl-single,pins = <0x17c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_19_gpio_pin: pinmux_P9_19_gpio_pin {
+		pinctrl-single,pins = <0x17c  0x2F>; };     /* Mode 7, RxActive */
+	P9_19_gpio_pu_pin: pinmux_P9_19_gpio_pu_pin {
+		pinctrl-single,pins = <0x17c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_19_gpio_pd_pin: pinmux_P9_19_gpio_pd_pin {
+		pinctrl-single,pins = <0x17c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_19_can_pin: pinmux_P9_19_can_pin {
+		pinctrl-single,pins = <0x17c  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_19_i2c_pin: pinmux_P9_19_i2c_pin {
+		pinctrl-single,pins = <0x17c  0x73>; };     /* (SLEWCTRL_SLOW | PIN_INPUT_PULLUP | MUX_MODE3) */
+
+	/* P9_20 (ZCZ ball D18) */
+	P9_20_default_pin: pinmux_P9_20_default_pin {
+		pinctrl-single,pins = <0x178  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_20_gpio_pin: pinmux_P9_20_gpio_pin {
+		pinctrl-single,pins = <0x178  0x2F>; };     /* Mode 7, RxActive */
+	P9_20_gpio_pu_pin: pinmux_P9_20_gpio_pu_pin {
+		pinctrl-single,pins = <0x178  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_20_gpio_pd_pin: pinmux_P9_20_gpio_pd_pin {
+		pinctrl-single,pins = <0x178  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_20_can_pin: pinmux_P9_20_can_pin {
+		pinctrl-single,pins = <0x178  0x12>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_20_i2c_pin: pinmux_P9_20_i2c_pin {
+		pinctrl-single,pins = <0x178  0x73>; };     /* (SLEWCTRL_SLOW | PIN_INPUT_PULLUP | MUX_MODE3) */
+
+	/* P9_21 (ZCZ ball B17) */
+	P9_21_default_pin: pinmux_P9_21_default_pin {
+		pinctrl-single,pins = <0x154  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_21_gpio_pin: pinmux_P9_21_gpio_pin {
+		pinctrl-single,pins = <0x154  0x2F>; };     /* Mode 7, RxActive */
+	P9_21_gpio_pu_pin: pinmux_P9_21_gpio_pu_pin {
+		pinctrl-single,pins = <0x154  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_21_gpio_pd_pin: pinmux_P9_21_gpio_pd_pin {
+		pinctrl-single,pins = <0x154  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_21_spi_pin: pinmux_P9_21_spi_pin {
+		pinctrl-single,pins = <0x154  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_21_uart_pin: pinmux_P9_21_uart_pin {
+		pinctrl-single,pins = <0x154  0x31>; };     /* Mode 1, Pull-Up, RxActive */
+	P9_21_i2c_pin: pinmux_P9_21_i2c_pin {
+		pinctrl-single,pins = <0x154  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_21_pwm_pin: pinmux_P9_21_pwm_pin {
+		pinctrl-single,pins = <0x154  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+
+	/* P9_22 (ZCZ ball A17) */
+	P9_22_default_pin: pinmux_P9_22_default_pin {
+		pinctrl-single,pins = <0x150  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_22_gpio_pin: pinmux_P9_22_gpio_pin {
+		pinctrl-single,pins = <0x150  0x2F>; };     /* Mode 7, RxActive */
+	P9_22_gpio_pu_pin: pinmux_P9_22_gpio_pu_pin {
+		pinctrl-single,pins = <0x150  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_22_gpio_pd_pin: pinmux_P9_22_gpio_pd_pin {
+		pinctrl-single,pins = <0x150  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_22_spi_pin: pinmux_P9_22_spi_pin {
+		pinctrl-single,pins = <0x150  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_22_uart_pin: pinmux_P9_22_uart_pin {
+		pinctrl-single,pins = <0x150  0x31>; };     /* Mode 1, Pull-Up, RxActive */
+	P9_22_i2c_pin: pinmux_P9_22_i2c_pin {
+		pinctrl-single,pins = <0x150  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_22_pwm_pin: pinmux_P9_22_pwm_pin {
+		pinctrl-single,pins = <0x150  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+
+	/* P9_23 (ZCZ ball V14) */
+	P9_23_default_pin: pinmux_P9_23_default_pin {
+		pinctrl-single,pins = <0x044  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_23_gpio_pin: pinmux_P9_23_gpio_pin {
+		pinctrl-single,pins = <0x044  0x2F>; };     /* Mode 7, RxActive */
+	P9_23_gpio_pu_pin: pinmux_P9_23_gpio_pu_pin {
+		pinctrl-single,pins = <0x044  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_23_gpio_pd_pin: pinmux_P9_23_gpio_pd_pin {
+		pinctrl-single,pins = <0x044  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_23_pwm_pin: pinmux_P9_23_pwm_pin {
+		pinctrl-single,pins = <0x044  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_24 (ZCZ ball D15) */
+	P9_24_default_pin: pinmux_P9_24_default_pin {
+		pinctrl-single,pins = <0x184  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_24_gpio_pin: pinmux_P9_24_gpio_pin {
+		pinctrl-single,pins = <0x184  0x2F>; };     /* Mode 7, RxActive */
+	P9_24_gpio_pu_pin: pinmux_P9_24_gpio_pu_pin {
+		pinctrl-single,pins = <0x184  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_24_gpio_pd_pin: pinmux_P9_24_gpio_pd_pin {
+		pinctrl-single,pins = <0x184  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_24_uart_pin: pinmux_P9_24_uart_pin {
+		pinctrl-single,pins = <0x184  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_24_can_pin: pinmux_P9_24_can_pin {
+		pinctrl-single,pins = <0x184  0x32>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_24_i2c_pin: pinmux_P9_24_i2c_pin {
+		pinctrl-single,pins = <0x184  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+	P9_24_pruin_pin: pinmux_P9_24_pruin_pin {
+		pinctrl-single,pins = <0x184  0x36>; };     /* Mode 6, Pull-Up, RxActive */
+
+	/* P9_25 (ZCZ ball A14) Audio   */
+	P9_25_default_pin: pinmux_P9_25_default_pin {
+		pinctrl-single,pins = <0x1ac  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_25_gpio_pin: pinmux_P9_25_gpio_pin {
+		pinctrl-single,pins = <0x1ac  0x2F>; };     /* Mode 7, RxActive */
+	P9_25_gpio_pu_pin: pinmux_P9_25_gpio_pu_pin {
+		pinctrl-single,pins = <0x1ac  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_25_gpio_pd_pin: pinmux_P9_25_gpio_pd_pin {
+		pinctrl-single,pins = <0x1ac  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_25_qep_pin: pinmux_P9_25_qep_pin {
+		pinctrl-single,pins = <0x1ac  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_25_pruout_pin: pinmux_P9_25_pruout_pin {
+		pinctrl-single,pins = <0x1ac  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_25_pruin_pin: pinmux_P9_25_pruin_pin {
+		pinctrl-single,pins = <0x1ac  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P9_25_audio_pin: pinmux_P9_25_audio_pin {
+		pinctrl-single,pins = <0x1ac  (PIN_INPUT_PULLUP | MUX_MODE0)>; };	/* mcasp0_ahclkx.mcasp0_ahclkx */
+
+	/* P9_26 (ZCZ ball D16) */
+	P9_26_default_pin: pinmux_P9_26_default_pin {
+		pinctrl-single,pins = <0x180  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_26_gpio_pin: pinmux_P9_26_gpio_pin {
+		pinctrl-single,pins = <0x180  0x2F>; };     /* Mode 7, RxActive */
+	P9_26_gpio_pu_pin: pinmux_P9_26_gpio_pu_pin {
+		pinctrl-single,pins = <0x180  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_26_gpio_pd_pin: pinmux_P9_26_gpio_pd_pin {
+		pinctrl-single,pins = <0x180  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_26_uart_pin: pinmux_P9_26_uart_pin {
+		pinctrl-single,pins = <0x180  0x30>; };     /* Mode 0, Pull-Up, RxActive */
+	P9_26_can_pin: pinmux_P9_26_can_pin {
+		pinctrl-single,pins = <0x180  0x12>; };     /* Mode 2, Pull-Up, RxActive */
+	P9_26_i2c_pin: pinmux_P9_26_i2c_pin {
+		pinctrl-single,pins = <0x180  0x33>; };     /* Mode 3, Pull-Up, RxActive */
+	P9_26_pruin_pin: pinmux_P9_26_pruin_pin {
+		pinctrl-single,pins = <0x180  0x36>; };     /* Mode 6, Pull-Up, RxActive */
+
+	/* P9_27 (ZCZ ball C13) */
+	P9_27_default_pin: pinmux_P9_27_default_pin {
+		pinctrl-single,pins = <0x1a4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_27_gpio_pin: pinmux_P9_27_gpio_pin {
+		pinctrl-single,pins = <0x1a4  0x2F>; };     /* Mode 7, RxActive */
+	P9_27_gpio_pu_pin: pinmux_P9_27_gpio_pu_pin {
+		pinctrl-single,pins = <0x1a4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_27_gpio_pd_pin: pinmux_P9_27_gpio_pd_pin {
+		pinctrl-single,pins = <0x1a4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_27_qep_pin: pinmux_P9_27_qep_pin {
+		pinctrl-single,pins = <0x1a4  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_27_pruout_pin: pinmux_P9_27_pruout_pin {
+		pinctrl-single,pins = <0x1a4  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_27_pruin_pin: pinmux_P9_27_pruin_pin {
+		pinctrl-single,pins = <0x1a4  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_28 (ZCZ ball C12) Audio   */
+	P9_28_default_pin: pinmux_P9_28_default_pin {
+		pinctrl-single,pins = <0x19c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_28_gpio_pin: pinmux_P9_28_gpio_pin {
+		pinctrl-single,pins = <0x19c  0x2F>; };     /* Mode 7, RxActive */
+	P9_28_gpio_pu_pin: pinmux_P9_28_gpio_pu_pin {
+		pinctrl-single,pins = <0x19c  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_28_gpio_pd_pin: pinmux_P9_28_gpio_pd_pin {
+		pinctrl-single,pins = <0x19c  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_28_pwm_pin: pinmux_P9_28_pwm_pin {
+		pinctrl-single,pins = <0x19c  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_28_spi_pin: pinmux_P9_28_spi_pin {
+		pinctrl-single,pins = <0x19c  0x23>; };     /* Mode 3, Pull-Down, RxActive */
+	P9_28_pwm2_pin: pinmux_P9_28_pwm2_pin {
+		pinctrl-single,pins = <0x19c  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+	P9_28_pruout_pin: pinmux_P9_28_pruout_pin {
+		pinctrl-single,pins = <0x19c  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_28_pruin_pin: pinmux_P9_28_pruin_pin {
+		pinctrl-single,pins = <0x19c  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P9_28_audio_pin: pinmux_P9_28_audio_pin {
+		pinctrl-single,pins = <0x19c  (PIN_OUTPUT_PULLDOWN | MUX_MODE2)>; };	/* mcasp0_ahclkr.mcasp0_axr2 */
+
+	/* P9_29 (ZCZ ball B13) Audio   */
+	P9_29_default_pin: pinmux_P9_29_default_pin {
+		pinctrl-single,pins = <0x194  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_29_gpio_pin: pinmux_P9_29_gpio_pin {
+		pinctrl-single,pins = <0x194  0x2F>; };     /* Mode 7, RxActive */
+	P9_29_gpio_pu_pin: pinmux_P9_29_gpio_pu_pin {
+		pinctrl-single,pins = <0x194  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_29_gpio_pd_pin: pinmux_P9_29_gpio_pd_pin {
+		pinctrl-single,pins = <0x194  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_29_pwm_pin: pinmux_P9_29_pwm_pin {
+		pinctrl-single,pins = <0x194  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_29_spi_pin: pinmux_P9_29_spi_pin {
+		pinctrl-single,pins = <0x194  0x23>; };     /* Mode 3, Pull-Down, RxActive */
+	P9_29_pruout_pin: pinmux_P9_29_pruout_pin {
+		pinctrl-single,pins = <0x194  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_29_pruin_pin: pinmux_P9_29_pruin_pin {
+		pinctrl-single,pins = <0x194  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P9_29_audio_pin: pinmux_P9_29_audio_pin {
+		pinctrl-single,pins = <0x194  (PIN_OUTPUT_PULLUP | MUX_MODE0)>; };	/* mcasp0_fsx.mcasp0_fsx */
+
+	/* P9_30 (ZCZ ball D12) */
+	P9_30_default_pin: pinmux_P9_30_default_pin {
+		pinctrl-single,pins = <0x198  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_30_gpio_pin: pinmux_P9_30_gpio_pin {
+		pinctrl-single,pins = <0x198  0x2F>; };     /* Mode 7, RxActive */
+	P9_30_gpio_pu_pin: pinmux_P9_30_gpio_pu_pin {
+		pinctrl-single,pins = <0x198  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_30_gpio_pd_pin: pinmux_P9_30_gpio_pd_pin {
+		pinctrl-single,pins = <0x198  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_30_pwm_pin: pinmux_P9_30_pwm_pin {
+		pinctrl-single,pins = <0x198  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_30_spi_pin: pinmux_P9_30_spi_pin {
+		pinctrl-single,pins = <0x198  0x23>; };     /* Mode 3, Pull-Down, RxActive */
+	P9_30_pruout_pin: pinmux_P9_30_pruout_pin {
+		pinctrl-single,pins = <0x198  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_30_pruin_pin: pinmux_P9_30_pruin_pin {
+		pinctrl-single,pins = <0x198  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_31 (ZCZ ball A13) Audio   */
+	P9_31_default_pin: pinmux_P9_31_default_pin {
+		pinctrl-single,pins = <0x190  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_31_gpio_pin: pinmux_P9_31_gpio_pin {
+		pinctrl-single,pins = <0x190  0x2F>; };     /* Mode 7, RxActive */
+	P9_31_gpio_pu_pin: pinmux_P9_31_gpio_pu_pin {
+		pinctrl-single,pins = <0x190  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_31_gpio_pd_pin: pinmux_P9_31_gpio_pd_pin {
+		pinctrl-single,pins = <0x190  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_31_pwm_pin: pinmux_P9_31_pwm_pin {
+		pinctrl-single,pins = <0x190  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_31_spi_pin: pinmux_P9_31_spi_pin {
+		pinctrl-single,pins = <0x190  0x23>; };     /* Mode 3, Pull-Down, RxActive */
+	P9_31_pruout_pin: pinmux_P9_31_pruout_pin {
+		pinctrl-single,pins = <0x190  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_31_pruin_pin: pinmux_P9_31_pruin_pin {
+		pinctrl-single,pins = <0x190  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+	P9_31_audio_pin: pinmux_P9_31_audio_pin {
+		pinctrl-single,pins = <0x190  (PIN_OUTPUT_PULLDOWN | MUX_MODE0)>; };	/* mcasp0_aclkx.mcasp0_aclkx */
+
+	/* P9_32                VADC    */
+	/* P9_33 (ZCZ ball C8 ) AIN4    */
+	/* P9_34                AGND    */
+	/* P9_35 (ZCZ ball A8 ) AIN6    */
+	/* P9_36 (ZCZ ball B8 ) AIN5    */
+	/* P9_37 (ZCZ ball B7 ) AIN2    */
+	/* P9_38 (ZCZ ball A7 ) AIN3    */
+	/* P9_39 (ZCZ ball B6 ) AIN0    */
+	/* P9_40 (ZCZ ball C7 ) AIN1    */
+
+	/* P9_41 (ZCZ ball D14) */
+	P9_41_default_pin: pinmux_P9_41_default_pin {
+		pinctrl-single,pins = <0x1b4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_41_gpio_pin: pinmux_P9_41_gpio_pin {
+		pinctrl-single,pins = <0x1b4  0x2F>; };     /* Mode 7, RxActive */
+	P9_41_gpio_pu_pin: pinmux_P9_41_gpio_pu_pin {
+		pinctrl-single,pins = <0x1b4  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_41_gpio_pd_pin: pinmux_P9_41_gpio_pd_pin {
+		pinctrl-single,pins = <0x1b4  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_41_timer_pin: pinmux_P9_41_timer_pin {
+		pinctrl-single,pins = <0x1b4  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+	P9_41_pruin_pin: pinmux_P9_41_pruin_pin {
+		pinctrl-single,pins = <0x1b4  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+
+	/* P9_41.1              */
+	/* P9_91 (ZCZ ball D13) */
+	P9_91_default_pin: pinmux_P9_91_default_pin {
+		pinctrl-single,pins = <0x1a8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_91_gpio_pin: pinmux_P9_91_gpio_pin {
+		pinctrl-single,pins = <0x1a8  0x2F>; };     /* Mode 7, RxActive */
+	P9_91_gpio_pu_pin: pinmux_P9_91_gpio_pu_pin {
+		pinctrl-single,pins = <0x1a8  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_91_gpio_pd_pin: pinmux_P9_91_gpio_pd_pin {
+		pinctrl-single,pins = <0x1a8  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_91_qep_pin: pinmux_P9_91_qep_pin {
+		pinctrl-single,pins = <0x1a8  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_91_pruout_pin: pinmux_P9_91_pruout_pin {
+		pinctrl-single,pins = <0x1a8  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_91_pruin_pin: pinmux_P9_91_pruin_pin {
+		pinctrl-single,pins = <0x1a8  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_42 (ZCZ ball C18) */
+	P9_42_default_pin: pinmux_P9_42_default_pin {
+		pinctrl-single,pins = <0x164  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_42_gpio_pin: pinmux_P9_42_gpio_pin {
+		pinctrl-single,pins = <0x164  0x2F>; };     /* Mode 7, RxActive */
+	P9_42_gpio_pu_pin: pinmux_P9_42_gpio_pu_pin {
+		pinctrl-single,pins = <0x164  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_42_gpio_pd_pin: pinmux_P9_42_gpio_pd_pin {
+		pinctrl-single,pins = <0x164  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_42_pwm_pin: pinmux_P9_42_pwm_pin {
+		pinctrl-single,pins = <0x164  0x20>; };     /* Mode 0, Pull-Down, RxActive */
+	P9_42_uart_pin: pinmux_P9_42_uart_pin {
+		pinctrl-single,pins = <0x164  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_42_spics_pin: pinmux_P9_42_spics_pin {
+		pinctrl-single,pins = <0x164  0x22>; };     /* Mode 2, Pull-Down, RxActive */
+	P9_42_spiclk_pin: pinmux_P9_42_spiclk_pin {
+		pinctrl-single,pins = <0x164  0x24>; };     /* Mode 4, Pull-Down, RxActive */
+
+	/* P9_42.1              */
+	/* P9_92 (ZCZ ball B12) */
+	P9_92_default_pin: pinmux_P9_92_default_pin {
+		pinctrl-single,pins = <0x1a0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_92_gpio_pin: pinmux_P9_92_gpio_pin {
+		pinctrl-single,pins = <0x1a0  0x2F>; };     /* Mode 7, RxActive */
+	P9_92_gpio_pu_pin: pinmux_P9_92_gpio_pu_pin {
+		pinctrl-single,pins = <0x1a0  0x37>; };     /* Mode 7, Pull-Up, RxActive */
+	P9_92_gpio_pd_pin: pinmux_P9_92_gpio_pd_pin {
+		pinctrl-single,pins = <0x1a0  0x27>; };     /* Mode 7, Pull-Down, RxActive */
+	P9_92_qep_pin: pinmux_P9_92_qep_pin {
+		pinctrl-single,pins = <0x1a0  0x21>; };     /* Mode 1, Pull-Down, RxActive */
+	P9_92_pruout_pin: pinmux_P9_92_pruout_pin {
+		pinctrl-single,pins = <0x1a0  0x25>; };     /* Mode 5, Pull-Down, RxActive */
+	P9_92_pruin_pin: pinmux_P9_92_pruin_pin {
+		pinctrl-single,pins = <0x1a0  0x26>; };     /* Mode 6, Pull-Down, RxActive */
+
+	/* P9_43                GND     */
+	/* P9_44                GND     */
+	/* P9_45                GND     */
+	/* P9_46                GND     */
+};
+
+/**********************************************************************/
+/* Pin Multiplex Helpers                                              */
+/*                                                                    */
+/* These provide userspace runtime pin configuration for the          */
+/* BeagleBone cape expansion headers                                  */
+/**********************************************************************/
+
+&ocp {
+	/************************/
+	/* P8 Header            */
+	/************************/
+
+	P8_07_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "timer";
+		pinctrl-0 = <&P8_07_default_pin>;
+		pinctrl-1 = <&P8_07_gpio_pin>;
+		pinctrl-2 = <&P8_07_gpio_pu_pin>;
+		pinctrl-3 = <&P8_07_gpio_pd_pin>;
+		pinctrl-4 = <&P8_07_timer_pin>;
+	};
+
+	P8_08_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "timer";
+		pinctrl-0 = <&P8_08_default_pin>;
+		pinctrl-1 = <&P8_08_gpio_pin>;
+		pinctrl-2 = <&P8_08_gpio_pu_pin>;
+		pinctrl-3 = <&P8_08_gpio_pd_pin>;
+		pinctrl-4 = <&P8_08_timer_pin>;
+	};
+
+	P8_09_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "timer";
+		pinctrl-0 = <&P8_09_default_pin>;
+		pinctrl-1 = <&P8_09_gpio_pin>;
+		pinctrl-2 = <&P8_09_gpio_pu_pin>;
+		pinctrl-3 = <&P8_09_gpio_pd_pin>;
+		pinctrl-4 = <&P8_09_timer_pin>;
+	};
+
+	P8_10_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "timer";
+		pinctrl-0 = <&P8_10_default_pin>;
+		pinctrl-1 = <&P8_10_gpio_pin>;
+		pinctrl-2 = <&P8_10_gpio_pu_pin>;
+		pinctrl-3 = <&P8_10_gpio_pd_pin>;
+		pinctrl-4 = <&P8_10_timer_pin>;
+	};
+
+	P8_11_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "qep";
+		pinctrl-0 = <&P8_11_default_pin>;
+		pinctrl-1 = <&P8_11_gpio_pin>;
+		pinctrl-2 = <&P8_11_gpio_pu_pin>;
+		pinctrl-3 = <&P8_11_gpio_pd_pin>;
+		pinctrl-4 = <&P8_11_pruout_pin>;
+		pinctrl-5 = <&P8_11_qep_pin>;
+	};
+
+	P8_12_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "qep";
+		pinctrl-0 = <&P8_12_default_pin>;
+		pinctrl-1 = <&P8_12_gpio_pin>;
+		pinctrl-2 = <&P8_12_gpio_pu_pin>;
+		pinctrl-3 = <&P8_12_gpio_pd_pin>;
+		pinctrl-4 = <&P8_12_pruout_pin>;
+		pinctrl-5 = <&P8_12_qep_pin>;
+	};
+
+	P8_13_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm";
+		pinctrl-0 = <&P8_13_default_pin>;
+		pinctrl-1 = <&P8_13_gpio_pin>;
+		pinctrl-2 = <&P8_13_gpio_pu_pin>;
+		pinctrl-3 = <&P8_13_gpio_pd_pin>;
+		pinctrl-4 = <&P8_13_pwm_pin>;
+	};
+
+	P8_14_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm";
+		pinctrl-0 = <&P8_14_default_pin>;
+		pinctrl-1 = <&P8_14_gpio_pin>;
+		pinctrl-2 = <&P8_14_gpio_pu_pin>;
+		pinctrl-3 = <&P8_14_gpio_pd_pin>;
+		pinctrl-4 = <&P8_14_pwm_pin>;
+	};
+
+	P8_15_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruin", "qep";
+		pinctrl-0 = <&P8_15_default_pin>;
+		pinctrl-1 = <&P8_15_gpio_pin>;
+		pinctrl-2 = <&P8_15_gpio_pu_pin>;
+		pinctrl-3 = <&P8_15_gpio_pd_pin>;
+		pinctrl-4 = <&P8_15_pruin_pin>;
+		pinctrl-5 = <&P8_15_qep_pin>;
+	};
+
+	P8_16_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruin", "qep";
+		pinctrl-0 = <&P8_16_default_pin>;
+		pinctrl-1 = <&P8_16_gpio_pin>;
+		pinctrl-2 = <&P8_16_gpio_pu_pin>;
+		pinctrl-3 = <&P8_16_gpio_pd_pin>;
+		pinctrl-4 = <&P8_16_pruin_pin>;
+		pinctrl-5 = <&P8_16_qep_pin>;
+	};
+
+	P8_17_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm";
+		pinctrl-0 = <&P8_17_default_pin>;
+		pinctrl-1 = <&P8_17_gpio_pin>;
+		pinctrl-2 = <&P8_17_gpio_pu_pin>;
+		pinctrl-3 = <&P8_17_gpio_pd_pin>;
+		pinctrl-4 = <&P8_17_pwm_pin>;
+	};
+
+	P8_18_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio";
+		pinctrl-0 = <&P8_18_default_pin>;
+		pinctrl-1 = <&P8_18_gpio_pin>;
+		pinctrl-2 = <&P8_18_gpio_pu_pin>;
+		pinctrl-3 = <&P8_18_gpio_pd_pin>;
+	};
+
+	P8_19_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm";
+		pinctrl-0 = <&P8_19_default_pin>;
+		pinctrl-1 = <&P8_19_gpio_pin>;
+		pinctrl-2 = <&P8_19_gpio_pu_pin>;
+		pinctrl-3 = <&P8_19_gpio_pd_pin>;
+		pinctrl-4 = <&P8_19_pwm_pin>;
+	};
+
+	P8_26_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio";
+		pinctrl-0 = <&P8_26_default_pin>;
+		pinctrl-1 = <&P8_26_gpio_pin>;
+		pinctrl-2 = <&P8_26_gpio_pu_pin>;
+		pinctrl-3 = <&P8_26_gpio_pd_pin>;
+	};
+
+	P8_27_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin", "hdmi";
+		pinctrl-0 = <&P8_27_default_pin>;
+		pinctrl-1 = <&P8_27_gpio_pin>;
+		pinctrl-2 = <&P8_27_gpio_pu_pin>;
+		pinctrl-3 = <&P8_27_gpio_pd_pin>;
+		pinctrl-4 = <&P8_27_pruout_pin>;
+		pinctrl-5 = <&P8_27_pruin_pin>;
+		pinctrl-6 = <&P8_27_hdmi_pin>;
+	};
+
+	P8_28_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin", "hdmi";
+		pinctrl-0 = <&P8_28_default_pin>;
+		pinctrl-1 = <&P8_28_gpio_pin>;
+		pinctrl-2 = <&P8_28_gpio_pu_pin>;
+		pinctrl-3 = <&P8_28_gpio_pd_pin>;
+		pinctrl-4 = <&P8_28_pruout_pin>;
+		pinctrl-5 = <&P8_28_pruin_pin>;
+		pinctrl-6 = <&P8_28_hdmi_pin>;
+	};
+
+	P8_29_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin", "hdmi";
+		pinctrl-0 = <&P8_29_default_pin>;
+		pinctrl-1 = <&P8_29_gpio_pin>;
+		pinctrl-2 = <&P8_29_gpio_pu_pin>;
+		pinctrl-3 = <&P8_29_gpio_pd_pin>;
+		pinctrl-4 = <&P8_29_pruout_pin>;
+		pinctrl-5 = <&P8_29_pruin_pin>;
+		pinctrl-6 = <&P8_29_hdmi_pin>;
+	};
+
+	P8_30_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin", "hdmi";
+		pinctrl-0 = <&P8_30_default_pin>;
+		pinctrl-1 = <&P8_30_gpio_pin>;
+		pinctrl-2 = <&P8_30_gpio_pu_pin>;
+		pinctrl-3 = <&P8_30_gpio_pd_pin>;
+		pinctrl-4 = <&P8_30_pruout_pin>;
+		pinctrl-5 = <&P8_30_pruin_pin>;
+		pinctrl-6 = <&P8_30_hdmi_pin>;
+	};
+
+	P8_31_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd","uart", "hdmi";
+		pinctrl-0 = <&P8_31_default_pin>;
+		pinctrl-1 = <&P8_31_gpio_pin>;
+		pinctrl-2 = <&P8_31_gpio_pu_pin>;
+		pinctrl-3 = <&P8_31_gpio_pd_pin>;
+		pinctrl-4 = <&P8_31_uart_pin>;
+		pinctrl-5 = <&P8_31_hdmi_pin>;
+	};
+
+	P8_32_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "hdmi";
+		pinctrl-0 = <&P8_32_default_pin>;
+		pinctrl-1 = <&P8_32_gpio_pin>;
+		pinctrl-2 = <&P8_32_gpio_pu_pin>;
+		pinctrl-3 = <&P8_32_gpio_pd_pin>;
+		pinctrl-4 = <&P8_32_hdmi_pin>;
+	};
+
+	P8_33_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "hdmi";
+		pinctrl-0 = <&P8_33_default_pin>;
+		pinctrl-1 = <&P8_33_gpio_pin>;
+		pinctrl-2 = <&P8_33_gpio_pu_pin>;
+		pinctrl-3 = <&P8_33_gpio_pd_pin>;
+		pinctrl-4 = <&P8_33_hdmi_pin>;
+	};
+
+	P8_34_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd","pwm", "hdmi";
+		pinctrl-0 = <&P8_34_default_pin>;
+		pinctrl-1 = <&P8_34_gpio_pin>;
+		pinctrl-2 = <&P8_34_gpio_pu_pin>;
+		pinctrl-3 = <&P8_34_gpio_pd_pin>;
+		pinctrl-4 = <&P8_34_pwm_pin>;
+		pinctrl-5 = <&P8_34_hdmi_pin>;
+	};
+
+	P8_35_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "hdmi";
+		pinctrl-0 = <&P8_35_default_pin>;
+		pinctrl-1 = <&P8_35_gpio_pin>;
+		pinctrl-2 = <&P8_35_gpio_pu_pin>;
+		pinctrl-3 = <&P8_35_gpio_pd_pin>;
+		pinctrl-4 = <&P8_35_hdmi_pin>;
+	};
+
+	P8_36_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd","pwm", "hdmi";
+		pinctrl-0 = <&P8_36_default_pin>;
+		pinctrl-1 = <&P8_36_gpio_pin>;
+		pinctrl-2 = <&P8_36_gpio_pu_pin>;
+		pinctrl-3 = <&P8_36_gpio_pd_pin>;
+		pinctrl-4 = <&P8_36_pwm_pin>;
+		pinctrl-5 = <&P8_36_hdmi_pin>;
+	};
+
+	P8_37_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd","uart","pwm", "hdmi";
+		pinctrl-0 = <&P8_37_default_pin>;
+		pinctrl-1 = <&P8_37_gpio_pin>;
+		pinctrl-2 = <&P8_37_gpio_pu_pin>;
+		pinctrl-3 = <&P8_37_gpio_pd_pin>;
+		pinctrl-4 = <&P8_37_uart_pin>;
+		pinctrl-5 = <&P8_37_pwm_pin>;
+		pinctrl-6 = <&P8_37_hdmi_pin>;
+	};
+
+	P8_38_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd","uart","pwm", "hdmi";
+		pinctrl-0 = <&P8_38_default_pin>;
+		pinctrl-1 = <&P8_38_gpio_pin>;
+		pinctrl-2 = <&P8_38_gpio_pu_pin>;
+		pinctrl-3 = <&P8_38_gpio_pd_pin>;
+		pinctrl-4 = <&P8_38_uart_pin>;
+		pinctrl-5 = <&P8_38_pwm_pin>;
+		pinctrl-6 = <&P8_38_hdmi_pin>;
+	};
+
+	P8_39_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin", "hdmi";
+		pinctrl-0 = <&P8_39_default_pin>;
+		pinctrl-1 = <&P8_39_gpio_pin>;
+		pinctrl-2 = <&P8_39_gpio_pu_pin>;
+		pinctrl-3 = <&P8_39_gpio_pd_pin>;
+		pinctrl-4 = <&P8_39_pruout_pin>;
+		pinctrl-5 = <&P8_39_pruin_pin>;
+		pinctrl-6 = <&P8_39_hdmi_pin>;
+	};
+
+	P8_40_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin", "hdmi";
+		pinctrl-0 = <&P8_40_default_pin>;
+		pinctrl-1 = <&P8_40_gpio_pin>;
+		pinctrl-2 = <&P8_40_gpio_pu_pin>;
+		pinctrl-3 = <&P8_40_gpio_pd_pin>;
+		pinctrl-4 = <&P8_40_pruout_pin>;
+		pinctrl-5 = <&P8_40_pruin_pin>;
+		pinctrl-6 = <&P8_40_hdmi_pin>;
+	};
+
+	P8_41_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin", "hdmi";
+		pinctrl-0 = <&P8_41_default_pin>;
+		pinctrl-1 = <&P8_41_gpio_pin>;
+		pinctrl-2 = <&P8_41_gpio_pu_pin>;
+		pinctrl-3 = <&P8_41_gpio_pd_pin>;
+		pinctrl-4 = <&P8_41_pruout_pin>;
+		pinctrl-5 = <&P8_41_pruin_pin>;
+		pinctrl-6 = <&P8_41_hdmi_pin>;
+	};
+
+	P8_42_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin", "hdmi";
+		pinctrl-0 = <&P8_42_default_pin>;
+		pinctrl-1 = <&P8_42_gpio_pin>;
+		pinctrl-2 = <&P8_42_gpio_pu_pin>;
+		pinctrl-3 = <&P8_42_gpio_pd_pin>;
+		pinctrl-4 = <&P8_42_pruout_pin>;
+		pinctrl-5 = <&P8_42_pruin_pin>;
+		pinctrl-6 = <&P8_42_hdmi_pin>;
+	};
+
+	P8_43_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin","pwm", "hdmi";
+		pinctrl-0 = <&P8_43_default_pin>;
+		pinctrl-1 = <&P8_43_gpio_pin>;
+		pinctrl-2 = <&P8_43_gpio_pu_pin>;
+		pinctrl-3 = <&P8_43_gpio_pd_pin>;
+		pinctrl-4 = <&P8_43_pruout_pin>;
+		pinctrl-5 = <&P8_43_pruin_pin>;
+		pinctrl-6 = <&P8_43_pwm_pin>;
+		pinctrl-7 = <&P8_43_hdmi_pin>;
+	};
+
+	P8_44_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin","pwm", "hdmi";
+		pinctrl-0 = <&P8_44_default_pin>;
+		pinctrl-1 = <&P8_44_gpio_pin>;
+		pinctrl-2 = <&P8_44_gpio_pu_pin>;
+		pinctrl-3 = <&P8_44_gpio_pd_pin>;
+		pinctrl-4 = <&P8_44_pruout_pin>;
+		pinctrl-5 = <&P8_44_pruin_pin>;
+		pinctrl-6 = <&P8_44_pwm_pin>;
+		pinctrl-7 = <&P8_44_hdmi_pin>;
+	};
+
+	P8_45_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin","pwm", "hdmi";
+		pinctrl-0 = <&P8_45_default_pin>;
+		pinctrl-1 = <&P8_45_gpio_pin>;
+		pinctrl-2 = <&P8_45_gpio_pu_pin>;
+		pinctrl-3 = <&P8_45_gpio_pd_pin>;
+		pinctrl-4 = <&P8_45_pruout_pin>;
+		pinctrl-5 = <&P8_45_pruin_pin>;
+		pinctrl-6 = <&P8_45_pwm_pin>;
+		pinctrl-7 = <&P8_45_hdmi_pin>;
+	};
+
+	P8_46_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pruout", "pruin","pwm", "hdmi";
+		pinctrl-0 = <&P8_46_default_pin>;
+		pinctrl-1 = <&P8_46_gpio_pin>;
+		pinctrl-2 = <&P8_46_gpio_pu_pin>;
+		pinctrl-3 = <&P8_46_gpio_pd_pin>;
+		pinctrl-4 = <&P8_46_pruout_pin>;
+		pinctrl-5 = <&P8_46_pruin_pin>;
+		pinctrl-6 = <&P8_46_pwm_pin>;
+		pinctrl-7 = <&P8_46_hdmi_pin>;
+	};
+
+	/************************/
+	/* P9 Header	        */
+	/************************/
+
+	P9_11_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "uart";
+		pinctrl-0 = <&P9_11_default_pin>;
+		pinctrl-1 = <&P9_11_gpio_pin>;
+		pinctrl-2 = <&P9_11_gpio_pu_pin>;
+		pinctrl-3 = <&P9_11_gpio_pd_pin>;
+		pinctrl-4 = <&P9_11_uart_pin>;
+	};
+
+	P9_12_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio";
+		pinctrl-0 = <&P9_12_default_pin>;
+		pinctrl-1 = <&P9_12_gpio_pin>;
+		pinctrl-2 = <&P9_12_gpio_pu_pin>;
+		pinctrl-3 = <&P9_12_gpio_pd_pin>;
+	};
+
+	P9_13_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "uart";
+		pinctrl-0 = <&P9_13_default_pin>;
+		pinctrl-1 = <&P9_13_gpio_pin>;
+		pinctrl-2 = <&P9_13_gpio_pu_pin>;
+		pinctrl-3 = <&P9_13_gpio_pd_pin>;
+		pinctrl-4 = <&P9_13_uart_pin>;
+	};
+
+	P9_14_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm";
+		pinctrl-0 = <&P9_14_default_pin>;
+		pinctrl-1 = <&P9_14_gpio_pin>;
+		pinctrl-2 = <&P9_14_gpio_pu_pin>;
+		pinctrl-3 = <&P9_14_gpio_pd_pin>;
+		pinctrl-4 = <&P9_14_pwm_pin>;
+	};
+
+	P9_15_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm";
+		pinctrl-0 = <&P9_15_default_pin>;
+		pinctrl-1 = <&P9_15_gpio_pin>;
+		pinctrl-2 = <&P9_15_gpio_pu_pin>;
+		pinctrl-3 = <&P9_15_gpio_pd_pin>;
+		pinctrl-4 = <&P9_15_pwm_pin>;
+	};
+
+	P9_16_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm";
+		pinctrl-0 = <&P9_16_default_pin>;
+		pinctrl-1 = <&P9_16_gpio_pin>;
+		pinctrl-2 = <&P9_16_gpio_pu_pin>;
+		pinctrl-3 = <&P9_16_gpio_pd_pin>;
+		pinctrl-4 = <&P9_16_pwm_pin>;
+	};
+
+	P9_17_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "spi", "i2c", "pwm";
+		pinctrl-0 = <&P9_17_default_pin>;
+		pinctrl-1 = <&P9_17_gpio_pin>;
+		pinctrl-2 = <&P9_17_gpio_pu_pin>;
+		pinctrl-3 = <&P9_17_gpio_pd_pin>;
+		pinctrl-4 = <&P9_17_spi_pin>;
+		pinctrl-5 = <&P9_17_i2c_pin>;
+		pinctrl-6 = <&P9_17_pwm_pin>;
+	};
+
+	P9_18_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "spi", "i2c", "pwm";
+		pinctrl-0 = <&P9_18_default_pin>;
+		pinctrl-1 = <&P9_18_gpio_pin>;
+		pinctrl-2 = <&P9_18_gpio_pu_pin>;
+		pinctrl-3 = <&P9_18_gpio_pd_pin>;
+		pinctrl-4 = <&P9_18_spi_pin>;
+		pinctrl-5 = <&P9_18_i2c_pin>;
+		pinctrl-6 = <&P9_18_pwm_pin>;
+	};
+
+	P9_19_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "can", "i2c";
+		pinctrl-0 = <&P9_19_default_pin>;
+		pinctrl-1 = <&P9_19_gpio_pin>;
+		pinctrl-2 = <&P9_19_gpio_pu_pin>;
+		pinctrl-3 = <&P9_19_gpio_pd_pin>;
+		pinctrl-4 = <&P9_19_can_pin>;
+		pinctrl-5 = <&P9_19_i2c_pin>;
+	};
+
+	P9_20_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "can", "i2c";
+		pinctrl-0 = <&P9_20_default_pin>;
+		pinctrl-1 = <&P9_20_gpio_pin>;
+		pinctrl-2 = <&P9_20_gpio_pu_pin>;
+		pinctrl-3 = <&P9_20_gpio_pd_pin>;
+		pinctrl-4 = <&P9_20_can_pin>;
+		pinctrl-5 = <&P9_20_i2c_pin>;
+	};
+
+	P9_21_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "spi", "uart", "i2c", "pwm";
+		pinctrl-0 = <&P9_21_default_pin>;
+		pinctrl-1 = <&P9_21_gpio_pin>;
+		pinctrl-2 = <&P9_21_gpio_pu_pin>;
+		pinctrl-3 = <&P9_21_gpio_pd_pin>;
+		pinctrl-4 = <&P9_21_spi_pin>;
+		pinctrl-5 = <&P9_21_uart_pin>;
+		pinctrl-6 = <&P9_21_i2c_pin>;
+		pinctrl-7 = <&P9_21_pwm_pin>;
+	};
+
+	P9_22_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "spi", "uart", "i2c", "pwm";
+		pinctrl-0 = <&P9_22_default_pin>;
+		pinctrl-1 = <&P9_22_gpio_pin>;
+		pinctrl-2 = <&P9_22_gpio_pu_pin>;
+		pinctrl-3 = <&P9_22_gpio_pd_pin>;
+		pinctrl-4 = <&P9_22_spi_pin>;
+		pinctrl-5 = <&P9_22_uart_pin>;
+		pinctrl-6 = <&P9_22_i2c_pin>;
+		pinctrl-7 = <&P9_22_pwm_pin>;
+	};
+
+	P9_23_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm";
+		pinctrl-0 = <&P9_23_default_pin>;
+		pinctrl-1 = <&P9_23_gpio_pin>;
+		pinctrl-2 = <&P9_23_gpio_pu_pin>;
+		pinctrl-3 = <&P9_23_gpio_pd_pin>;
+		pinctrl-4 = <&P9_23_pwm_pin>;
+	};
+
+	P9_24_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "uart", "can", "i2c", "pruin";
+		pinctrl-0 = <&P9_24_default_pin>;
+		pinctrl-1 = <&P9_24_gpio_pin>;
+		pinctrl-2 = <&P9_24_gpio_pu_pin>;
+		pinctrl-3 = <&P9_24_gpio_pd_pin>;
+		pinctrl-4 = <&P9_24_uart_pin>;
+		pinctrl-5 = <&P9_24_can_pin>;
+		pinctrl-6 = <&P9_24_i2c_pin>;
+		pinctrl-7 = <&P9_24_pruin_pin>;
+	};
+
+	P9_25_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "qep", "pruout", "pruin", "audio";
+		pinctrl-0 = <&P9_25_default_pin>;
+		pinctrl-1 = <&P9_25_gpio_pin>;
+		pinctrl-2 = <&P9_25_gpio_pu_pin>;
+		pinctrl-3 = <&P9_25_gpio_pd_pin>;
+		pinctrl-4 = <&P9_25_qep_pin>;
+		pinctrl-5 = <&P9_25_pruout_pin>;
+		pinctrl-6 = <&P9_25_pruin_pin>;
+		pinctrl-7 = <&P9_25_audio_pin>;
+	};
+
+	P9_26_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "uart", "can", "i2c", "pruin";
+		pinctrl-0 = <&P9_26_default_pin>;
+		pinctrl-1 = <&P9_26_gpio_pin>;
+		pinctrl-2 = <&P9_26_gpio_pu_pin>;
+		pinctrl-3 = <&P9_26_gpio_pd_pin>;
+		pinctrl-4 = <&P9_26_uart_pin>;
+		pinctrl-5 = <&P9_26_can_pin>;
+		pinctrl-6 = <&P9_26_i2c_pin>;
+		pinctrl-7 = <&P9_26_pruin_pin>;
+	};
+
+	P9_27_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "qep", "pruout", "pruin";
+		pinctrl-0 = <&P9_27_default_pin>;
+		pinctrl-1 = <&P9_27_gpio_pin>;
+		pinctrl-2 = <&P9_27_gpio_pu_pin>;
+		pinctrl-3 = <&P9_27_gpio_pd_pin>;
+		pinctrl-4 = <&P9_27_qep_pin>;
+		pinctrl-5 = <&P9_27_pruout_pin>;
+		pinctrl-6 = <&P9_27_pruin_pin>;
+	};
+
+	P9_28_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pwm2", "pruout", "pruin", "audio";
+		pinctrl-0 = <&P9_28_default_pin>;
+		pinctrl-1 = <&P9_28_gpio_pin>;
+		pinctrl-2 = <&P9_28_gpio_pu_pin>;
+		pinctrl-3 = <&P9_28_gpio_pd_pin>;
+		pinctrl-4 = <&P9_28_pwm_pin>;
+		pinctrl-5 = <&P9_28_spi_pin>;
+		pinctrl-6 = <&P9_28_pwm2_pin>;
+		pinctrl-7 = <&P9_28_pruout_pin>;
+		pinctrl-8 = <&P9_28_pruin_pin>;
+		pinctrl-9 = <&P9_28_audio_pin>;
+	};
+
+	P9_29_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pruout", "pruin", "audio";
+		pinctrl-0 = <&P9_29_default_pin>;
+		pinctrl-1 = <&P9_29_gpio_pin>;
+		pinctrl-2 = <&P9_29_gpio_pu_pin>;
+		pinctrl-3 = <&P9_29_gpio_pd_pin>;
+		pinctrl-4 = <&P9_29_pwm_pin>;
+		pinctrl-5 = <&P9_29_spi_pin>;
+		pinctrl-6 = <&P9_29_pruout_pin>;
+		pinctrl-7 = <&P9_29_pruin_pin>;
+		pinctrl-8 = <&P9_29_audio_pin>;
+	};
+
+	P9_30_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pruout", "pruin";
+		pinctrl-0 = <&P9_30_default_pin>;
+		pinctrl-1 = <&P9_30_gpio_pin>;
+		pinctrl-2 = <&P9_30_gpio_pu_pin>;
+		pinctrl-3 = <&P9_30_gpio_pd_pin>;
+		pinctrl-4 = <&P9_30_pwm_pin>;
+		pinctrl-5 = <&P9_30_spi_pin>;
+		pinctrl-6 = <&P9_30_pruout_pin>;
+		pinctrl-7 = <&P9_30_pruin_pin>;
+	};
+
+	P9_31_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pruout", "pruin", "audio";
+		pinctrl-0 = <&P9_31_default_pin>;
+		pinctrl-1 = <&P9_31_gpio_pin>;
+		pinctrl-2 = <&P9_31_gpio_pu_pin>;
+		pinctrl-3 = <&P9_31_gpio_pd_pin>;
+		pinctrl-4 = <&P9_31_pwm_pin>;
+		pinctrl-5 = <&P9_31_spi_pin>;
+		pinctrl-6 = <&P9_31_pruout_pin>;
+		pinctrl-7 = <&P9_31_pruin_pin>;
+		pinctrl-8 = <&P9_31_audio_pin>;
+	};
+
+	P9_41_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "timer", "pruin";
+		pinctrl-0 = <&P9_41_default_pin>;
+		pinctrl-1 = <&P9_41_gpio_pin>;
+		pinctrl-2 = <&P9_41_gpio_pu_pin>;
+		pinctrl-3 = <&P9_41_gpio_pd_pin>;
+		pinctrl-4 = <&P9_41_timer_pin>;
+		pinctrl-5 = <&P9_41_pruin_pin>;
+	};
+
+	P9_91_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "qep", "pruout", "pruin";
+		pinctrl-0 = <&P9_91_default_pin>;
+		pinctrl-1 = <&P9_91_gpio_pin>;
+		pinctrl-2 = <&P9_91_gpio_pu_pin>;
+		pinctrl-3 = <&P9_91_gpio_pd_pin>;
+		pinctrl-4 = <&P9_91_qep_pin>;
+		pinctrl-5 = <&P9_91_pruout_pin>;
+		pinctrl-6 = <&P9_91_pruin_pin>;
+	};
+
+	P9_42_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "uart", "spics", "spiclk";
+		pinctrl-0 = <&P9_42_default_pin>;
+		pinctrl-1 = <&P9_42_gpio_pin>;
+		pinctrl-2 = <&P9_42_gpio_pu_pin>;
+		pinctrl-3 = <&P9_42_gpio_pd_pin>;
+		pinctrl-4 = <&P9_42_pwm_pin>;
+		pinctrl-5 = <&P9_42_uart_pin>;
+		pinctrl-6 = <&P9_42_spics_pin>;
+		pinctrl-7 = <&P9_42_spiclk_pin>;
+	};
+
+	P9_92_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "qep", "pruout", "pruin";
+		pinctrl-0 = <&P9_92_default_pin>;
+		pinctrl-1 = <&P9_92_gpio_pin>;
+		pinctrl-2 = <&P9_92_gpio_pu_pin>;
+		pinctrl-3 = <&P9_92_gpio_pd_pin>;
+		pinctrl-4 = <&P9_92_qep_pin>;
+		pinctrl-5 = <&P9_92_pruout_pin>;
+		pinctrl-6 = <&P9_92_pruin_pin>;
+	};
+
+	cape-universal {
+		compatible = "gpio-of-helper";
+		status = "okay";
+		pinctrl-names = "default";
+		pinctrl-0 = <>;
+
+		P8_07 {
+			gpio-name = "P8_07";
+			gpio = <&gpio2 2 0>;
+			input;
+			dir-changeable;
+		};
+		P8_08 {
+			gpio-name = "P8_08";
+			gpio = <&gpio2 3 0>;
+			input;
+			dir-changeable;
+		};
+		P8_09 {
+			gpio-name = "P8_09";
+			gpio = <&gpio2 5 0>;
+			input;
+			dir-changeable;
+		};
+		P8_10 {
+			gpio-name = "P8_10";
+			gpio = <&gpio2 4 0>;
+			input;
+			dir-changeable;
+		};
+		P8_11 {
+			gpio-name = "P8_11";
+			gpio = <&gpio1 13 0>;
+			input;
+			dir-changeable;
+		};
+		P8_12 {
+			gpio-name = "P8_12";
+			gpio = <&gpio1 12 0>;
+			input;
+			dir-changeable;
+		};
+		P8_13 {
+			gpio-name = "P8_13";
+			gpio = <&gpio0 23 0>;
+			input;
+			dir-changeable;
+		};
+		P8_14 {
+			gpio-name = "P8_14";
+			gpio = <&gpio0 26 0>;
+			input;
+			dir-changeable;
+		};
+		P8_15 {
+			gpio-name = "P8_15";
+			gpio = <&gpio1 15 0>;
+			input;
+			dir-changeable;
+		};
+		P8_16 {
+			gpio-name = "P8_16";
+			gpio = <&gpio1 14 0>;
+			input;
+			dir-changeable;
+		};
+		P8_17 {
+			gpio-name = "P8_17";
+			gpio = <&gpio0 27 0>;
+			input;
+			dir-changeable;
+		};
+		P8_18 {
+			gpio-name = "P8_18";
+			gpio = <&gpio2 1 0>;
+			input;
+			dir-changeable;
+		};
+		P8_19 {
+			gpio-name = "P8_19";
+			gpio = <&gpio0 22 0>;
+			input;
+			dir-changeable;
+		};
+
+		P8_26 {
+			gpio-name = "P8_26";
+			gpio = <&gpio1 29 0>;
+			input;
+			dir-changeable;
+		};
+		P8_27 {
+			gpio-name = "P8_27";
+			gpio = <&gpio2 22 0>;
+			input;
+			dir-changeable;
+		};
+		P8_28 {
+			gpio-name = "P8_28";
+			gpio = <&gpio2 24 0>;
+			input;
+			dir-changeable;
+		};
+		P8_29 {
+			gpio-name = "P8_29";
+			gpio = <&gpio2 23 0>;
+			input;
+			dir-changeable;
+		};
+		P8_30 {
+			gpio-name = "P8_30";
+			gpio = <&gpio2 25 0>;
+			input;
+			dir-changeable;
+		};
+		P8_31 {
+			gpio-name = "P8_31";
+			gpio = <&gpio0 10 0>;
+			input;
+			dir-changeable;
+		};
+		P8_32 {
+			gpio-name = "P8_32";
+			gpio = <&gpio0 11 0>;
+			input;
+			dir-changeable;
+		};
+		P8_33 {
+			gpio-name = "P8_33";
+			gpio = <&gpio0 9 0>;
+			input;
+			dir-changeable;
+		};
+		P8_34 {
+			gpio-name = "P8_34";
+			gpio = <&gpio2 17 0>;
+			input;
+			dir-changeable;
+		};
+		P8_35 {
+			gpio-name = "P8_35";
+			gpio = <&gpio0 8 0>;
+			input;
+			dir-changeable;
+		};
+		P8_36 {
+			gpio-name = "P8_36";
+			gpio = <&gpio2 16 0>;
+			input;
+			dir-changeable;
+		};
+		P8_37 {
+			gpio-name = "P8_37";
+			gpio = <&gpio2 14 0>;
+			input;
+			dir-changeable;
+		};
+		P8_38 {
+			gpio-name = "P8_38";
+			gpio = <&gpio2 15 0>;
+			input;
+			dir-changeable;
+		};
+		P8_39 {
+			gpio-name = "P8_39";
+			gpio = <&gpio2 12 0>;
+			input;
+			dir-changeable;
+		};
+		P8_40 {
+			gpio-name = "P8_40";
+			gpio = <&gpio2 13 0>;
+			input;
+			dir-changeable;
+		};
+		P8_41 {
+			gpio-name = "P8_41";
+			gpio = <&gpio2 10 0>;
+			input;
+			dir-changeable;
+		};
+		P8_42 {
+			gpio-name = "P8_42";
+			gpio = <&gpio2 11 0>;
+			input;
+			dir-changeable;
+		};
+		P8_43 {
+			gpio-name = "P8_43";
+			gpio = <&gpio2 8 0>;
+			input;
+			dir-changeable;
+		};
+		P8_44 {
+			gpio-name = "P8_44";
+			gpio = <&gpio2 9 0>;
+			input;
+			dir-changeable;
+		};
+		P8_45 {
+			gpio-name = "P8_45";
+			gpio = <&gpio2 6 0>;
+			input;
+			dir-changeable;
+		};
+		P8_46 {
+			gpio-name = "P8_46";
+			gpio = <&gpio2 7 0>;
+			input;
+			dir-changeable;
+		};
+
+
+		P9_11 {
+			gpio-name = "P9_11";
+			gpio = <&gpio0 30 0>;
+			input;
+			dir-changeable;
+		};
+		P9_12 {
+			gpio-name = "P9_12";
+			gpio = <&gpio1 28 0>;
+			input;
+			dir-changeable;
+		};
+		P9_13 {
+			gpio-name = "P9_13";
+			gpio = <&gpio0 31 0>;
+			input;
+			dir-changeable;
+		};
+		P9_14 {
+			gpio-name = "P9_14";
+			gpio = <&gpio1 18 0>;
+			input;
+			dir-changeable;
+		};
+		P9_15 {
+			gpio-name = "P9_15";
+			gpio = <&gpio1 16 0>;
+			input;
+			dir-changeable;
+		};
+		P9_16 {
+			gpio-name = "P9_16";
+			gpio = <&gpio1 19 0>;
+			input;
+			dir-changeable;
+		};
+		P9_17 {
+			gpio-name = "P9_17";
+			gpio = <&gpio0 5 0>;
+			input;
+			dir-changeable;
+		};
+		P9_18 {
+			gpio-name = "P9_18";
+			gpio = <&gpio0 4 0>;
+			input;
+			dir-changeable;
+		};
+		P9_19 {
+			gpio-name = "P9_19";
+			gpio = <&gpio0 13 0>;
+			input;
+			dir-changeable;
+		};
+		P9_20 {
+			gpio-name = "P9_20";
+			gpio = <&gpio0 12 0>;
+			input;
+			dir-changeable;
+		};
+		P9_21 {
+			gpio-name = "P9_21";
+			gpio = <&gpio0 3 0>;
+			input;
+			dir-changeable;
+		};
+		P9_22 {
+			gpio-name = "P9_22";
+			gpio = <&gpio0 2 0>;
+			input;
+			dir-changeable;
+		};
+		P9_23 {
+			gpio-name = "P9_23";
+			gpio = <&gpio1 17 0>;
+			input;
+			dir-changeable;
+		};
+		P9_24 {
+			gpio-name = "P9_24";
+			gpio = <&gpio0 15 0>;
+			input;
+			dir-changeable;
+		};
+		P9_25 {
+			gpio-name = "P9_25";
+			gpio = <&gpio3 21 0>;
+			input;
+			dir-changeable;
+		};
+		P9_26 {
+			gpio-name = "P9_26";
+			gpio = <&gpio0 14 0>;
+			input;
+			dir-changeable;
+		};
+		P9_27 {
+			gpio-name = "P9_27";
+			gpio = <&gpio3 19 0>;
+			input;
+			dir-changeable;
+		};
+		P9_28 {
+			gpio-name = "P9_28";
+			gpio = <&gpio3 17 0>;
+			input;
+			dir-changeable;
+		};
+		P9_29 {
+			gpio-name = "P9_29";
+			gpio = <&gpio3 15 0>;
+			input;
+			dir-changeable;
+		};
+		P9_30 {
+			gpio-name = "P9_30";
+			gpio = <&gpio3 16 0>;
+			input;
+			dir-changeable;
+		};
+		P9_31 {
+			gpio-name = "P9_31";
+			gpio = <&gpio3 14 0>;
+			input;
+			dir-changeable;
+		};
+		P9_41 {
+			gpio-name = "P9_41";
+			gpio = <&gpio0 20 0>;
+			input;
+			dir-changeable;
+		};
+		P9_91 {
+			gpio-name = "P9_91";
+			gpio = <&gpio3 20 0>;
+			input;
+			dir-changeable;
+		};
+		P9_42 {
+			gpio-name = "P9_42";
+			gpio = <&gpio0 7 0>;
+			input;
+			dir-changeable;
+		};
+		P9_92 {
+			gpio-name = "P9_92";
+			gpio = <&gpio3 18 0>;
+			input;
+			dir-changeable;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index 007b5e5..54a3b8d 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -6,6 +6,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <dt-bindings/mfd/tps65217.h>
+
 / {
 	cpus {
 		cpu@0 {
@@ -29,14 +31,14 @@
 		compatible = "gpio-leds";
 
 		led2 {
-			label = "beaglebone:green:heartbeat";
+			label = "beaglebone:green:usr0";
 			gpios = <&gpio1 21 GPIO_ACTIVE_HIGH>;
 			linux,default-trigger = "heartbeat";
 			default-state = "off";
 		};
 
 		led3 {
-			label = "beaglebone:green:mmc0";
+			label = "beaglebone:green:usr1";
 			gpios = <&gpio1 22 GPIO_ACTIVE_HIGH>;
 			linux,default-trigger = "mmc0";
 			default-state = "off";
@@ -66,9 +68,6 @@
 };
 
 &am33xx_pinmux {
-	pinctrl-names = "default";
-	pinctrl-0 = <&clkout2_pin>;
-
 	user_leds_s0: user_leds_s0 {
 		pinctrl-single,pins = <
 			AM33XX_IOPAD(0x854, PIN_OUTPUT_PULLDOWN | MUX_MODE7)	/* gpmc_a5.gpio1_21 */
@@ -99,15 +98,11 @@
 		>;
 	};
 
-	clkout2_pin: pinmux_clkout2_pin {
-		pinctrl-single,pins = <
-			AM33XX_IOPAD(0x9b4, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr1.clkout2 */
-		>;
-	};
-
 	cpsw_default: cpsw_default {
 		pinctrl-single,pins = <
 			/* Slave 1 */
+			0x108 (PIN_INPUT | MUX_MODE0)		/* mii1_col.mii1_col */
+			0x10c (PIN_INPUT | MUX_MODE0)		/* mii1_crs.mii1_crs */
 			AM33XX_IOPAD(0x910, PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxerr.mii1_rxerr */
 			AM33XX_IOPAD(0x914, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mii1_txen.mii1_txen */
 			AM33XX_IOPAD(0x918, PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxdv.mii1_rxdv */
@@ -127,6 +122,8 @@
 	cpsw_sleep: cpsw_sleep {
 		pinctrl-single,pins = <
 			/* Slave 1 reset value */
+			0x108 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x10c (PIN_INPUT_PULLDOWN | MUX_MODE7)
 			AM33XX_IOPAD(0x910, PIN_INPUT_PULLDOWN | MUX_MODE7)
 			AM33XX_IOPAD(0x914, PIN_INPUT_PULLDOWN | MUX_MODE7)
 			AM33XX_IOPAD(0x918, PIN_INPUT_PULLDOWN | MUX_MODE7)
@@ -310,8 +307,23 @@
 	 * by the hardware problems. (Tip: double-check by performing a current
 	 * measurement after shutdown: it should be less than 1 mA.)
 	 */
+
+	interrupts = <7>; /* NMI */
+	interrupt-parent = <&intc>;
+
 	ti,pmic-shutdown-controller;
 
+	charger {
+		interrupts = <TPS65217_IRQ_AC>, <TPS65217_IRQ_USB>;
+		interrupts-names = "AC", "USB";
+		status = "okay";
+	};
+
+	pwrbutton {
+		interrupts = <TPS65217_IRQ_PB>;
+		status = "okay";
+	};
+
 	regulators {
 		dcdc1_reg: regulator@0 {
 			regulator-name = "vdds_dpr";
@@ -393,3 +405,32 @@
 &sham {
 	status = "okay";
 };
+
+&rtc {
+	system-power-controller;
+};
+
+/* the cape manager */
+/ {
+	bone_capemgr {
+		compatible = "ti,bone-capemgr";
+		status = "okay";
+
+		nvmem-cells = <&baseboard_data &cape0_data &cape1_data &cape2_data &cape3_data>;
+		nvmem-cell-names = "baseboard", "slot0", "slot1", "slot2", "slot3";
+		#slots = <4>;
+
+		/* map board revisions to compatible definitions */
+		baseboardmaps {
+			baseboard_beaglebone: board@0 {
+				board-name = "A335BONE";
+				compatible-name = "ti,beaglebone";
+			};
+
+			baseboard_beaglebone_black: board@1 {
+				board-name = "A335BNLT";
+				compatible-name = "ti,beaglebone-black";
+			};
+		};
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-bone-emmc-in-reset.dtsi b/arch/arm/boot/dts/am335x-bone-emmc-in-reset.dtsi
new file mode 100644
index 0000000..7d8f673
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-emmc-in-reset.dtsi
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* standard */
+
+&gpio1 {
+	emmc_rst {
+		gpio-hog;
+		gpios = <20 0>;
+		output-high;
+		line-name = "EMMC ResetN";
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-bone-jtag.dtsi b/arch/arm/boot/dts/am335x-bone-jtag.dtsi
new file mode 100644
index 0000000..603ef0a
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-jtag.dtsi
@@ -0,0 +1,20 @@
+/*
+ * Device Tree Source for bone jtag
+ *
+ * Copyright (C) 2015 Robert Nelson <robertcnelson@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&am33xx_pinmux {
+	pinctrl-names = "default";
+	pinctrl-0 = <&clkout2_pin>;
+
+	clkout2_pin: pinmux_clkout2_pin {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b4, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr1.clkout2 */
+		>;
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-can0.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-can0.dtsi
new file mode 100644
index 0000000..0961216
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-can0.dtsi
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-can0.dtsi"
+
+/* cape universal */
+
+/*
+ *&ocp {
+ *	P9_19_pinmux {
+ *		mode = "can";
+ *	};
+ *	P9_20_pinmux {
+ *		mode = "can";
+ *	};
+ *};
+ *
+ *&dcan0 {
+ *	pinctrl-0 = <>;
+ *};
+ *
+ */
+
+/* standard */
+
+&am33xx_pinmux {
+	dcan0_pins: pinmux_dcan0_pins {
+		pinctrl-single,pins = <
+			/* P9_20: uart1_ctsn.d_can0_tx */
+			BONE_P9_20 (PIN_OUTPUT_PULLUP | MUX_MODE2)
+			/* P9_19: uart1_rtsn.d_can0_rx */
+			BONE_P9_19 (PIN_INPUT_PULLUP | MUX_MODE2)
+		>;
+	};
+};
+
+&dcan0 {
+	pinctrl-0 = <&dcan0_pins>;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-can1.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-can1.dtsi
new file mode 100644
index 0000000..9e26413
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-can1.dtsi
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-can1.dtsi"
+
+/* cape universal */
+
+/*
+ *&ocp {
+ *	P9_24_pinmux {
+ *		mode = "can";
+ *	};
+ *	P9_26_pinmux {
+ *		mode = "can";
+ *	};
+ *};
+ *
+ *&dcan1 {
+ *	pinctrl-0 = <>;
+ *};
+ *
+ */
+
+/* standard */
+
+&am33xx_pinmux {
+	dcan1_pins: pinmux_dcan1_pins {
+		pinctrl-single,pins = <
+			/* P9_26: uart1_rxd.d_can1_tx */
+			BONE_P9_26 (PIN_OUTPUT_PULLUP | MUX_MODE2)
+			/* P9_24: uart1_txd.d_can1_rx */
+			BONE_P9_24 (PIN_INPUT_PULLUP | MUX_MODE2)
+		>;
+	};
+};
+
+&dcan1 {
+	pinctrl-0 = <&dcan1_pins>;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-emmc.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-emmc.dtsi
new file mode 100644
index 0000000..22cf462
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-emmc.dtsi
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Testing */
+/* lsblk */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-emmc.dtsi"
+
+/* cape universal */
+
+/*
+ *&ocp {
+ *	P8_21_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_20_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_25_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_24_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_05_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_06_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_23_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_22_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_03_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_04_pinmux {
+ *		state = "disabled";
+ *	};
+ *};
+ *
+ *&mmc2 {
+ *	pinctrl-0 = <>;
+ *};
+ *
+ */
+
+/* standard */
+
+&am33xx_pinmux {
+	emmc_pins: pinmux_emmc_pins {
+		pinctrl-single,pins = <
+			/* P8_21: gpmc_csn1.mmc1_clk */
+			BONE_P8_21 (PIN_INPUT_PULLUP | MUX_MODE2)
+			/* P8_20: gpmc_csn2.mmc1_cmd */
+			BONE_P8_20 (PIN_INPUT_PULLUP | MUX_MODE2)
+			/* P8_25: gpmc_ad0.mmc1_dat0 */
+			BONE_P8_25 (PIN_INPUT_PULLUP | MUX_MODE1)
+			/* P8_24: gpmc_ad1.mmc1_dat1 */
+			BONE_P8_24 (PIN_INPUT_PULLUP | MUX_MODE1)
+			/* P8_05: gpmc_ad2.mmc1_dat2 */
+			BONE_P8_05 (PIN_INPUT_PULLUP | MUX_MODE1)
+			/* P8_06: gpmc_ad3.mmc1_dat3 */
+			BONE_P8_06 (PIN_INPUT_PULLUP | MUX_MODE1)
+			/* P8_23: gpmc_ad4.mmc1_dat4 */
+			BONE_P8_23 (PIN_INPUT_PULLUP | MUX_MODE1)
+			/* P8_22: gpmc_ad5.mmc1_dat5 */
+			BONE_P8_22 (PIN_INPUT_PULLUP | MUX_MODE1)
+			/* P8_03: gpmc_ad6.mmc1_dat6 */
+			BONE_P8_03 (PIN_INPUT_PULLUP | MUX_MODE1)
+			/* P8_04: gpmc_ad7.mmc1_dat7 */
+			BONE_P8_04 (PIN_INPUT_PULLUP | MUX_MODE1)
+		>;
+	};
+};
+
+&mmc2 {
+	pinctrl-0 = <&emmc_pins>;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-i2c2.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-i2c2.dtsi
new file mode 100644
index 0000000..abf3b57
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-i2c2.dtsi
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-i2c2.dtsi"
+
+/* cape universal */
+
+/*
+ *&ocp {
+ *	P9_19_pinmux {
+ *		mode = "i2c";
+ *	};
+ *	P9_20_pinmux {
+ *		mode = "i2c";
+ *	};
+ *};
+ *
+ *&dcan0 {
+ *	pinctrl-0 = <>;
+ *};
+ *
+ */
+
+/* standard */
+
+&am33xx_pinmux {
+	i2c2_pins: pinmux_i2c2_pins {
+		pinctrl-single,pins = <
+			/* P9_20: uart1_ctsn.i2c2_sda */
+			BONE_P9_20 (SLEWCTRL_SLOW | PIN_INPUT_PULLUP | MUX_MODE3)
+			/* P9_19: uart1_rtsn.i2c2_scl */
+			BONE_P9_19 (SLEWCTRL_SLOW | PIN_INPUT_PULLUP | MUX_MODE3)
+		>;
+	};
+};
+
+&i2c2 {
+	pinctrl-0 = <&i2c2_pins>;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-nxp-hdmi.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-nxp-hdmi.dtsi
new file mode 100644
index 0000000..5205fa0
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-nxp-hdmi.dtsi
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "am335x-peripheral-nxp-hdmi.dtsi"
+
+/* cape universal */
+
+/*
+ *&ocp {
+ *	P8_27_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_28_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_29_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_30_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_31_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_32_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_33_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_34_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_35_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_36_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_37_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_38_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_39_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_40_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_41_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_42_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_43_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_44_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_45_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_46_pinmux {
+ *		state = "disabled";
+ *	};
+ *};
+ */
+
+/* standard */
+
+&am33xx_pinmux {
+	nxp_hdmi_pins: pinmux_nxp_hdmi_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+			AM33XX_IOPAD(0x8a0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data0.lcd_data0 */
+			AM33XX_IOPAD(0x8a4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data1.lcd_data1 */
+			AM33XX_IOPAD(0x8a8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data2.lcd_data2 */
+			AM33XX_IOPAD(0x8ac, PIN_OUTPUT | MUX_MODE0)		/* lcd_data3.lcd_data3 */
+			AM33XX_IOPAD(0x8b0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data4.lcd_data4 */
+			AM33XX_IOPAD(0x8b4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data5.lcd_data5 */
+			AM33XX_IOPAD(0x8b8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data6.lcd_data6 */
+			AM33XX_IOPAD(0x8bc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data7.lcd_data7 */
+			AM33XX_IOPAD(0x8c0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data8.lcd_data8 */
+			AM33XX_IOPAD(0x8c4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data9.lcd_data9 */
+			AM33XX_IOPAD(0x8c8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data10.lcd_data10 */
+			AM33XX_IOPAD(0x8cc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data11.lcd_data11 */
+			AM33XX_IOPAD(0x8d0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data12.lcd_data12 */
+			AM33XX_IOPAD(0x8d4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data13.lcd_data13 */
+			AM33XX_IOPAD(0x8d8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data14.lcd_data14 */
+			AM33XX_IOPAD(0x8dc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data15.lcd_data15 */
+			AM33XX_IOPAD(0x8e0, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_vsync.lcd_vsync */
+			AM33XX_IOPAD(0x8e4, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_hsync.lcd_hsync */
+			AM33XX_IOPAD(0x8e8, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_pclk.lcd_pclk */
+			AM33XX_IOPAD(0x8ec, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_ac_bias_en.lcd_ac_bias_en */
+		>;
+	};
+
+	nxp_hdmi_off_pins: nxp_hdmi_off_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+		>;
+	};
+};
+
+&i2c0 {
+	tda19988 {
+		pinctrl-names = "default", "off";
+		pinctrl-0 = <&nxp_hdmi_bonelt_pins>;
+		pinctrl-1 = <&nxp_hdmi_bonelt_off_pins>;
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-panel-1024x600-24bit.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-panel-1024x600-24bit.dtsi
new file mode 100644
index 0000000..65e5fbb
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-panel-1024x600-24bit.dtsi
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-panel-1024x600-24bit.dtsi"
+
+/* cape universal */
+
+/*
+ *&ocp {
+ *	P8_27_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_28_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_29_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_30_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_31_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_32_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_33_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_34_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_35_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_36_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_37_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_38_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_39_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_40_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_41_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_42_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_43_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_44_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_45_pinmux {
+ *		state = "disabled";
+ *	};
+ *	P8_46_pinmux {
+ *		state = "disabled";
+ *	};
+ *};
+ */
+
+/* standard */
+
+&am33xx_pinmux {
+	lcd_24bit_pins: pinmux_lcd_24bit_pins {
+		pinctrl-single,pins = <
+
+			/* P8_45: lcd_data0.lcd_data0 */
+			BONE_P8_45 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_46: lcd_data1.lcd_data1 */
+			BONE_P8_46 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_43: lcd_data2.lcd_data2 */
+			BONE_P8_43 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_44: lcd_data3.lcd_data3 */
+			BONE_P8_44 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_41: lcd_data4.lcd_data4 */
+			BONE_P8_41 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_42: lcd_data5.lcd_data5 */
+			BONE_P8_42 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_39: lcd_data6.lcd_data6 */
+			BONE_P8_39 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_40: lcd_data7.lcd_data7 */
+			BONE_P8_40 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_37: lcd_data8.lcd_data8 */
+			BONE_P8_37 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_38: lcd_data9.lcd_data9 */
+			BONE_P8_38 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_36: lcd_data10.lcd_data10 */
+			BONE_P8_36 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_34: lcd_data11.lcd_data11 */
+			BONE_P8_34 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_35: lcd_data12.lcd_data12 */
+			BONE_P8_35 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_33: lcd_data13.lcd_data13 */
+			BONE_P8_33 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_31: lcd_data14.lcd_data14 */
+			BONE_P8_31 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_32: lcd_data15.lcd_data15 */
+			BONE_P8_32 (PIN_OUTPUT | MUX_MODE0)
+
+			/* gpmc_ad15.lcd_data16 */
+			BONE_P8_15 (PIN_OUTPUT | MUX_MODE1)
+			/* gpmc_ad14.lcd_data17 */
+			BONE_P8_16 (PIN_OUTPUT | MUX_MODE1)
+			/* gpmc_ad13.lcd_data18 */
+			BONE_P8_11 (PIN_OUTPUT | MUX_MODE1)
+			/* gpmc_ad12.lcd_data19 */
+			BONE_P8_12 (PIN_OUTPUT | MUX_MODE1)
+			/* gpmc_ad11.lcd_data20 */
+			BONE_P8_17 (PIN_OUTPUT | MUX_MODE1)
+			/* gpmc_ad10.lcd_data21 */
+			BONE_P8_14 (PIN_OUTPUT | MUX_MODE1)
+			/* gpmc_ad9.lcd_data22 */
+			BONE_P8_13 (PIN_OUTPUT | MUX_MODE1)
+			/* gpmc_ad8.lcd_data23 */
+			BONE_P8_19 (PIN_OUTPUT | MUX_MODE1)
+
+			/* P8_27: lcd_vsync.lcd_vsync */
+			BONE_P8_27 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_29: lcd_hsync.lcd_hsync */
+			BONE_P8_29 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_28: lcd_pclk.lcd_pclk*/
+			BONE_P8_28 (PIN_OUTPUT | MUX_MODE0)
+			/* P8_30: lcd_ac_bias_en.lcd_ac_bias_en */
+			BONE_P8_30 (PIN_OUTPUT | MUX_MODE0)
+		>;
+	};
+};
+
+/ {
+	panel {
+		pinctrl-0 = <&lcd_24bit_pins>;
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-spi0.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-spi0.dtsi
new file mode 100644
index 0000000..354e66a
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-spi0.dtsi
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-spi0.dtsi"
+
+/* cape universal */
+
+/*
+ *&ocp {
+ *	P9_17_pinmux {
+ *		status = "disabled";
+ *	};
+ *	P9_18_pinmux {
+ *		status = "disabled";
+ *	};
+ *	P9_21_pinmux {
+ *		status = "disabled";
+ *	};
+ *	P9_22_pinmux {
+ *		status = "disabled";
+ *	};
+ *};
+ *
+ *&spi0 {
+ *	pinctrl-0 = <>;
+ *};
+ *
+ */
+
+/* standard */
+
+&am33xx_pinmux {
+	spi0_pins: pinmux_spi0_pins {
+		pinctrl-single,pins = <
+			0x150 (PIN_INPUT_PULLUP | MUX_MODE0)	/* spi0_sclk.spi0_sclk */
+			0x154 (PIN_INPUT_PULLUP | MUX_MODE0)	/* spi0_d0.spi0_d0 */
+			0x158 (PIN_OUTPUT_PULLUP | MUX_MODE0)	/* spi0_d1.spi0_d1 */
+			0x15c (PIN_OUTPUT_PULLUP | MUX_MODE0)	/* spi0_cs0.spi0_cs0 */
+		>;
+	};
+};
+
+&spi0 {
+	pinctrl-0 = <&spi0_pins>;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-spi1.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-spi1.dtsi
new file mode 100644
index 0000000..bff7f8d
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-spi1.dtsi
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-spi1.dtsi"
+
+/* standard */
+
+&am33xx_pinmux {
+	spi1_pins: pinmux_spi1_pins {
+		pinctrl-single,pins = <
+			0x190 0x33	/* mcasp0_aclkx.spi1_sclk, INPUT_PULLUP | MODE3 */
+			0x194 0x33	/* mcasp0_fsx.spi1_d0, INPUT_PULLUP | MODE3 */
+			0x198 0x13	/* mcasp0_axr0.spi1_d1, OUTPUT_PULLUP | MODE3 */
+			0x19c 0x13	/* mcasp0_ahclkr.spi1_cs0, OUTPUT_PULLUP | MODE3 */
+			// 0x164 0x12	/* eCAP0_in_PWM0_out.spi1_cs1 OUTPUT_PULLUP | MODE2 */		>;
+	};
+};
+
+&spi1 {
+	pinctrl-0 = <&spi1_pins>;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-spi1a.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-spi1a.dtsi
new file mode 100644
index 0000000..62874c8
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-spi1a.dtsi
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-spi1.dtsi"
+
+/* standard */
+
+&am33xx_pinmux {
+	spi1a_pins: pinmux_spi1a_pins {
+		pinctrl-single,pins = <
+			0x164 0x34	/* eCAP0_in_PWM0_out.spi1_sclk, INPUT_PULLUP | MODE4 */
+					/* NOTE: P9.42 is connected to two pads */
+			// 0x1A0 0x27	/* set the other pad to gpio input */
+			0x194 0x33	/* mcasp0_fsx.spi1_d0, INPUT_PULLUP | MODE3 */
+			0x198 0x13	/* mcasp0_axr0.spi1_d1, OUTPUT_PULLUP | MODE3 */
+			0x178 0x14	/* uart1_ctsn.spi1_cs0, OUTPUT_PULLUP | MODE4 */		>;
+	};
+};
+
+&spi1 {
+	pinctrl-0 = <&spi1a_pins>;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS1.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS1.dtsi
new file mode 100644
index 0000000..ae5b813
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS1.dtsi
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Testing */
+/* sudo /sbin/getty -L ttyS1 115200 vt102 */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-ttyS1.dtsi"
+
+/* cape universal */
+
+/*
+ *&ocp {
+ *	P9_24_pinmux {
+ *		mode = "uart";
+ *	};
+ *	P9_26_pinmux {
+ *		mode = "uart";
+ *	};
+ *};
+ *
+ *&uart1 {
+ *	pinctrl-0 = <>;
+ *};
+ *
+ */
+
+/* standard */
+
+&am33xx_pinmux {
+	uart1_pins: pinmux_uart1_pins {
+		pinctrl-single,pins = <
+			/* P9_24: uart1_txd.uart1_txd */
+			BONE_P9_24 (PIN_OUTPUT_PULLDOWN | MUX_MODE0)
+			/* P9_26: uart1_rxd.uart1_rxd */
+			BONE_P9_26 (PIN_INPUT_PULLUP | MUX_MODE0)
+		>;
+	};
+};
+
+&uart1 {
+	pinctrl-0 = <&uart1_pins>;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS2.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS2.dtsi
new file mode 100644
index 0000000..5fa593a
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS2.dtsi
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Testing */
+/* sudo /sbin/getty -L ttyS2 115200 vt102 */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-ttyS2.dtsi"
+
+/* cape universal */
+
+/*
+ *&ocp {
+ *	P9_21_pinmux {
+ *		mode = "uart";
+ *	};
+ *	P9_22_pinmux {
+ *		mode = "uart";
+ *	};
+ *};
+ *
+ *&uart2 {
+ *	pinctrl-0 = <>;
+ *};
+ *
+ */
+
+/* standard */
+
+&am33xx_pinmux {
+	uart2_pins: pinmux_uart2_pins {
+		pinctrl-single,pins = <
+			/* P9_21: spi0_d0.uart2_txd */
+			BONE_P9_21 (PIN_OUTPUT_PULLDOWN | MUX_MODE1)
+			/* P9_22: spi0_sclk.uart2_rxd */
+			BONE_P9_22 (PIN_INPUT_PULLUP | MUX_MODE1)
+		>;
+	};
+};
+
+&uart2 {
+	pinctrl-0 = <&uart2_pins>;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS4.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS4.dtsi
new file mode 100644
index 0000000..1d22a95
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS4.dtsi
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Testing */
+/* sudo /sbin/getty -L ttyS4 115200 vt102 */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-ttyS4.dtsi"
+
+/* cape universal */
+
+/*
+ *&ocp {
+ *	P9_11_pinmux {
+ *		mode = "uart";
+ *	};
+ *	P9_13_pinmux {
+ *		mode = "uart";
+ *	};
+ *};
+ *
+ *&uart4 {
+ *	pinctrl-0 = <>;
+ *};
+ *
+ */
+
+/* standard */
+
+&am33xx_pinmux {
+	uart4_pins: pinmux_uart4_pins {
+		pinctrl-single,pins = <
+			/* P9_11: gpmc_wait0.uart4_rxd_mux2 */
+			BONE_P9_11 (PIN_INPUT_PULLUP | MUX_MODE6)
+			/* P9_13: gpmc_wpn.uart4_txd_mux2  */
+			BONE_P9_13 (PIN_OUTPUT_PULLDOWN | MUX_MODE6)
+		>;
+	};
+};
+
+&uart4 {
+	pinctrl-0 = <&uart4_pins>;
+};
diff --git b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS5.dtsi b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS5.dtsi
new file mode 100644
index 0000000..01d0aec
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bone-pinmux-ttyS5.dtsi
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Testing */
+/* sudo /sbin/getty -L ttyS5 115200 vt102 */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include "am335x-peripheral-ttyS5.dtsi"
+
+/* cape universal */
+
+/*
+ *&ocp {
+ *	P8_37_pinmux {
+ *		mode = "uart";
+ *	};
+ *	P8_38_pinmux {
+ *		mode = "uart";
+ *	};
+ *};
+ *
+ *&uart5 {
+ *	pinctrl-0 = <>;
+ *};
+ *
+ */
+
+/* standard */
+
+&am33xx_pinmux {
+	uart5_pins: pinmux_uart5_pins {
+		pinctrl-single,pins = <
+			/* P8_38: lcd_data9.uart5_rxd */
+			BONE_P8_38 (PIN_INPUT_PULLUP | MUX_MODE4)
+			/* P8_37: lcd_data8.uart5_txd */
+			BONE_P8_37 (PIN_OUTPUT_PULLDOWN | MUX_MODE4)
+		>;
+	};
+};
+
+&uart5 {
+	pinctrl-0 = <&uart5_pins>;
+};
diff --git a/arch/arm/boot/dts/am335x-bone.dts b/arch/arm/boot/dts/am335x-bone.dts
index 6b84937..3688fff 100644
--- a/arch/arm/boot/dts/am335x-bone.dts
+++ b/arch/arm/boot/dts/am335x-bone.dts
@@ -9,6 +9,7 @@
 
 #include "am33xx.dtsi"
 #include "am335x-bone-common.dtsi"
+/* #include "am335x-bone-jtag.dtsi" */
 
 / {
 	model = "TI AM335x BeagleBone";
diff --git b/arch/arm/boot/dts/am335x-boneblack-audio.dts b/arch/arm/boot/dts/am335x-boneblack-audio.dts
new file mode 100644
index 0000000..cac3626
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-audio.dts
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+
+	clk_mcasp0_fixed: clk_mcasp0_fixed {
+	      #clock-cells = <0>;
+	      compatible = "fixed-clock";
+	      clock-frequency = <24576000>;
+	};
+
+	clk_mcasp0: clk_mcasp0 {
+	      #clock-cells = <0>;
+	      compatible = "gpio-gate-clock";
+	      clocks = <&clk_mcasp0_fixed>;
+	      enable-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>; /* BeagleBone Black Clk enable on GPIO1_27 */
+	};
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-bbb-exp-c.dts b/arch/arm/boot/dts/am335x-boneblack-bbb-exp-c.dts
new file mode 100644
index 0000000..8d795c0
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-bbb-exp-c.dts
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common-no-capemgr.dtsi"
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
+
+&cpu0_opp_table {
+	/*
+	 * All PG 2.0 silicon may not support 1GHz but some of the early
+	 * BeagleBone Blacks have PG 2.0 silicon which is guaranteed
+	 * to support 1GHz OPP so enable it for PG 2.0 on this board.
+	 */
+	oppnitro@1000000000 {
+		opp-supported-hw = <0x06 0x0100>;
+	};
+};
+
+#include "am335x-cape-bbb-exp-c.dtsi"
diff --git b/arch/arm/boot/dts/am335x-boneblack-bbb-exp-r.dts b/arch/arm/boot/dts/am335x-boneblack-bbb-exp-r.dts
new file mode 100644
index 0000000..5df881e
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-bbb-exp-r.dts
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common-no-capemgr.dtsi"
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
+
+&cpu0_opp_table {
+	/*
+	 * All PG 2.0 silicon may not support 1GHz but some of the early
+	 * BeagleBone Blacks have PG 2.0 silicon which is guaranteed
+	 * to support 1GHz OPP so enable it for PG 2.0 on this board.
+	 */
+	oppnitro@1000000000 {
+		opp-supported-hw = <0x06 0x0100>;
+	};
+};
+
+#include "am335x-cape-bbb-exp-r.dtsi"
diff --git b/arch/arm/boot/dts/am335x-boneblack-bbbmini.dts b/arch/arm/boot/dts/am335x-boneblack-bbbmini.dts
new file mode 100644
index 0000000..5ed89a2
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-bbbmini.dts
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *                    Modified by Mirko Denecke <mirkix@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+#include <dt-bindings/pinctrl/am33xx.h>
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
+
+&cpu0_opp_table {
+	/*
+	 * All PG 2.0 silicon may not support 1GHz but some of the early
+	 * BeagleBone Blacks have PG 2.0 silicon which is guaranteed
+	 * to support 1GHz OPP so enable it for PG 2.0 on this board.
+	 */
+	oppnitro@1000000000 {
+		opp-supported-hw = <0x06 0x0100>;
+	};
+};
+
+&am33xx_pinmux {
+	dcan1_pins: pinmux_dcan1_pins {
+		pinctrl-single,pins = <
+			/* P9_26: uart1_rxd.d_can1_tx */
+			BONE_P9_26 (PIN_OUTPUT_PULLUP | MUX_MODE2)
+			/* P9_24: uart1_txd.d_can1_rx */
+			BONE_P9_24 (PIN_INPUT_PULLUP | MUX_MODE2)
+		>;
+	};
+
+	pru_pins: pinmux_pru_pins {
+		pinctrl-single,pins = <
+			0x03c 0x35	/* ecap0_in_pwm0_out.pr1_ecap0_ecap_capin, MODE5 | INPUT_PULLUP | PRU, PPM-sum, SBUS, DSM  */
+
+			0x0e8 0x25	/* lcd_pclk.pr1_pru1_pru_r30_10, MODE5 | OUTPUT | PRU, CH_1 */
+			0x0e0 0x25	/* lcd_vsync.pr1_pru1_pru_r30_8, MODE5 | OUTPUT | PRU, CH_2 */
+			0x0ec 0x25	/* lcd_ac_bias_en.pr1_pru1_pru_r30_11, MODE5 | OUTPUT | PRU, CH_3 */
+			0x0e4 0x25	/* lcd_hsync.pr1_pru1_pru_r30_9, MODE5 | OUTPUT | PRU, CH_4 */
+			0x0bc 0x25	/* lcd_data7.pr1_pru1_pru_r30_7, MODE5 | OUTPUT | PRU, CH_5 */
+			0x0b8 0x25	/* lcd_data6.pr1_pru1_pru_r30_6, MODE5 | OUTPUT | PRU, CH_6 */
+			0x0b4 0x25	/* lcd_data5.pr1_pru1_pru_r30_5, MODE5 | OUTPUT | PRU, CH_7 */
+			0x0b0 0x25	/* lcd_data4.pr1_pru1_pru_r30_4, MODE5 | OUTPUT | PRU, CH_8 */
+			0x0ac 0x25	/* lcd_data3.pr1_pru1_pru_r30_3, MODE5 | OUTPUT | PRU, CH_9 */
+			0x0a8 0x25	/* lcd_data2.pr1_pru1_pru_r30_2, MODE5 | OUTPUT | PRU, CH_10 */
+			0x0a4 0x25	/* lcd_data1.pr1_pru1_pru_r30_1, MODE5 | OUTPUT | PRU, CH_11 */
+			0x0a0 0x25	/* lcd_data0.pr1_pru1_pru_r30_0, MODE5 | OUTPUT | PRU, CH_12 */
+
+			BONE_P8_12 (PIN_OUTPUT_PULLDOWN | MUX_MODE6) /* HC-SR04 TRIG */
+			BONE_P8_16 (PIN_INPUT_PULLDOWN | MUX_MODE6) /* HC-SR04 ECHO */
+
+			BONE_P9_25 (PIN_INPUT_PULLDOWN | MUX_MODE6) /* MPU9250 INT */
+		>;
+	};
+
+	spi0_pins: pinmux_spi0_pins {
+		pinctrl-single,pins = <
+			/* P9_22: spi0_sclk.spi0_sclk */
+			BONE_P9_22 (PIN_INPUT_PULLUP | MUX_MODE0)
+			/* P9_21: spi0_d0.spi0_d0 */
+			BONE_P9_21 (PIN_INPUT_PULLUP | MUX_MODE0)
+			/* P9_18: spi0_d1.spi0_d1 */
+			BONE_P9_18 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+			/* P9_17: spi0_cs0.spi0_cs0 */
+			BONE_P9_17 (PIN_OUTPUT_PULLUP | MUX_MODE0)
+		>;
+	};
+
+	spi1_pins: pinmux_spi1_pins {
+		pinctrl-single,pins = <
+			/* P9_31: mcasp0_aclkx.spi1_sclk */
+			BONE_P9_31 (PIN_INPUT_PULLUP | MUX_MODE3)
+
+			/* P9_29: mcasp0_fsx.spi1_d0 */
+			BONE_P9_29 (PIN_INPUT_PULLUP | MUX_MODE3)
+
+			/* P9_30: mcasp0_axr0.spi1_d1 */
+			BONE_P9_30 (PIN_OUTPUT_PULLUP | MUX_MODE3)
+
+			/* P9_28: mcasp0_ahclkr.spi1_cs0 */
+			BONE_P9_28 (PIN_OUTPUT_PULLUP | MUX_MODE3)
+
+			/* P9_19: uart1_rtsn.spi1_cs1 */
+/*			BONE_P9_19 (PIN_OUTPUT_PULLUP | MUX_MODE4)*/
+
+			/* P9_42: ecap0_in_pwm0_out.spi1_cs1 */
+			BONE_P9_42A (PIN_OUTPUT_PULLUP | MUX_MODE2)
+		>;
+	};
+
+	uart4_pins: pinmux_uart4_pins {
+		pinctrl-single,pins = <
+			/* P9_11: gpmc_wait0.uart4_rxd_mux2 */
+			BONE_P9_11 (PIN_INPUT_PULLUP | MUX_MODE6)
+			/* P9_13: gpmc_wpn.uart4_txd_mux2  */
+			BONE_P9_13 (PIN_OUTPUT_PULLDOWN | MUX_MODE6)
+		>;
+	};
+
+	uart5_pins: pinmux_uart5_pins {
+		pinctrl-single,pins = <
+			/* P8_38: lcd_data9.uart5_rxd */
+			BONE_P8_38 (PIN_INPUT_PULLUP | MUX_MODE4)
+			/* P8_37: lcd_data8.uart5_txd */
+			BONE_P8_37 (PIN_OUTPUT_PULLDOWN | MUX_MODE4)
+		>;
+	};
+};
+
+&dcan1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&dcan1_pins>;
+	status = "okay";
+};
+
+&i2c2 {
+	clock-frequency = <400000>;
+};
+
+&spi0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&spi0_pins>;
+	status = "okay";
+
+	spi0_0 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		spi-max-frequency = <24000000>;
+		reg = <0>;
+		compatible = "spidev";
+	};
+};
+
+&spi1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&spi1_pins>;
+	status = "okay";
+
+	spi1_0 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0>;
+		spi-max-frequency = <24000000>;
+		compatible = "spidev";
+	};
+
+	spi1_1 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <1>;
+		spi-max-frequency = <24000000>;
+		compatible = "spidev";
+	};
+};
+
+&tscadc {
+	adc {
+		ti,adc-channels = <0 1>;
+	};
+};
+
+&pruss {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pru_pins>;
+	status = "okay";
+};
+
+&uart4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart4_pins>;
+	status = "okay";
+};
+
+&uart5 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart5_pins>;
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-cape-bone-argus.dts b/arch/arm/boot/dts/am335x-boneblack-cape-bone-argus.dts
new file mode 100644
index 0000000..c97c912
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-cape-bone-argus.dts
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common-no-capemgr.dtsi"
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
+
+&cpu0_opp_table {
+	/*
+	 * All PG 2.0 silicon may not support 1GHz but some of the early
+	 * BeagleBone Blacks have PG 2.0 silicon which is guaranteed
+	 * to support 1GHz OPP so enable it for PG 2.0 on this board.
+	 */
+	oppnitro@1000000000 {
+		opp-supported-hw = <0x06 0x0100>;
+	};
+};
+
+&am33xx_pinmux {
+	nxp_hdmi_bonelt_pins: nxp_hdmi_bonelt_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+			AM33XX_IOPAD(0x8a0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data0.lcd_data0 */
+			AM33XX_IOPAD(0x8a4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data1.lcd_data1 */
+			AM33XX_IOPAD(0x8a8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data2.lcd_data2 */
+			AM33XX_IOPAD(0x8ac, PIN_OUTPUT | MUX_MODE0)		/* lcd_data3.lcd_data3 */
+			AM33XX_IOPAD(0x8b0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data4.lcd_data4 */
+			AM33XX_IOPAD(0x8b4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data5.lcd_data5 */
+			AM33XX_IOPAD(0x8b8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data6.lcd_data6 */
+			AM33XX_IOPAD(0x8bc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data7.lcd_data7 */
+			AM33XX_IOPAD(0x8c0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data8.lcd_data8 */
+			AM33XX_IOPAD(0x8c4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data9.lcd_data9 */
+			AM33XX_IOPAD(0x8c8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data10.lcd_data10 */
+			AM33XX_IOPAD(0x8cc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data11.lcd_data11 */
+			AM33XX_IOPAD(0x8d0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data12.lcd_data12 */
+			AM33XX_IOPAD(0x8d4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data13.lcd_data13 */
+			AM33XX_IOPAD(0x8d8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data14.lcd_data14 */
+			AM33XX_IOPAD(0x8dc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data15.lcd_data15 */
+			AM33XX_IOPAD(0x8e0, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_vsync.lcd_vsync */
+			AM33XX_IOPAD(0x8e4, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_hsync.lcd_hsync */
+			AM33XX_IOPAD(0x8e8, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_pclk.lcd_pclk */
+			AM33XX_IOPAD(0x8ec, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_ac_bias_en.lcd_ac_bias_en */
+		>;
+	};
+	nxp_hdmi_bonelt_off_pins: nxp_hdmi_bonelt_off_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+		>;
+	};
+};
+
+&lcdc {
+	status = "okay";
+	port {
+		lcdc_0: endpoint@0 {
+			remote-endpoint = <&hdmi_0>;
+		};
+	};
+};
+
+&i2c0 {
+	tda19988 {
+		compatible = "nxp,tda998x";
+		reg = <0x70>;
+		pinctrl-names = "default", "off";
+		pinctrl-0 = <&nxp_hdmi_bonelt_pins>;
+		pinctrl-1 = <&nxp_hdmi_bonelt_off_pins>;
+
+		port {
+			hdmi_0: endpoint@0 {
+				remote-endpoint = <&lcdc_0>;
+			};
+		};
+	};
+};
+
+#include "am335x-bone-argus.dtsi"
diff --git b/arch/arm/boot/dts/am335x-boneblack-emmc-overlay.dts b/arch/arm/boot/dts/am335x-boneblack-emmc-overlay.dts
new file mode 100644
index 0000000..ccd358e
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-emmc-overlay.dts
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+/* #include <dt-bindings/display/tda998x.h> */
+/* #include "am335x-bone-jtag.dtsi" */
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
+
+&cpu0_opp_table {
+	/*
+	 * All PG 2.0 silicon may not support 1GHz but some of the early
+	 * BeagleBone Blacks have PG 2.0 silicon which is guaranteed
+	 * to support 1GHz OPP so enable it for PG 2.0 on this board.
+	 */
+	oppnitro@1000000000 {
+		opp-supported-hw = <0x06 0x0100>;
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-hdmi-overlay.dts b/arch/arm/boot/dts/am335x-boneblack-hdmi-overlay.dts
new file mode 100644
index 0000000..0582e57
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-hdmi-overlay.dts
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+#include <dt-bindings/display/tda998x.h>
+/* #include "am335x-bone-jtag.dtsi" */
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+/* EMMC in reset */
+&gpio1 {
+	emmc_rst {
+		gpio-hog;
+		gpios = <20 0>;
+		output-high;
+		line-name = "EMMC ResetN";
+	};
+};
+
+&cpu0_opp_table {
+	/*
+	 * All PG 2.0 silicon may not support 1GHz but some of the early
+	 * BeagleBone Blacks have PG 2.0 silicon which is guaranteed
+	 * to support 1GHz OPP so enable it for PG 2.0 on this board.
+	 */
+	oppnitro@1000000000 {
+		opp-supported-hw = <0x06 0x0100>;
+	};
+};
+
+&am33xx_pinmux {
+	nxp_hdmi_bonelt_pins: nxp_hdmi_bonelt_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+			AM33XX_IOPAD(0x8a0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data0.lcd_data0 */
+			AM33XX_IOPAD(0x8a4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data1.lcd_data1 */
+			AM33XX_IOPAD(0x8a8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data2.lcd_data2 */
+			AM33XX_IOPAD(0x8ac, PIN_OUTPUT | MUX_MODE0)		/* lcd_data3.lcd_data3 */
+			AM33XX_IOPAD(0x8b0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data4.lcd_data4 */
+			AM33XX_IOPAD(0x8b4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data5.lcd_data5 */
+			AM33XX_IOPAD(0x8b8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data6.lcd_data6 */
+			AM33XX_IOPAD(0x8bc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data7.lcd_data7 */
+			AM33XX_IOPAD(0x8c0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data8.lcd_data8 */
+			AM33XX_IOPAD(0x8c4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data9.lcd_data9 */
+			AM33XX_IOPAD(0x8c8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data10.lcd_data10 */
+			AM33XX_IOPAD(0x8cc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data11.lcd_data11 */
+			AM33XX_IOPAD(0x8d0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data12.lcd_data12 */
+			AM33XX_IOPAD(0x8d4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data13.lcd_data13 */
+			AM33XX_IOPAD(0x8d8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data14.lcd_data14 */
+			AM33XX_IOPAD(0x8dc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data15.lcd_data15 */
+			AM33XX_IOPAD(0x8e0, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_vsync.lcd_vsync */
+			AM33XX_IOPAD(0x8e4, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_hsync.lcd_hsync */
+			AM33XX_IOPAD(0x8e8, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_pclk.lcd_pclk */
+			AM33XX_IOPAD(0x8ec, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_ac_bias_en.lcd_ac_bias_en */
+		>;
+	};
+	nxp_hdmi_bonelt_off_pins: nxp_hdmi_bonelt_off_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+		>;
+	};
+
+	mcasp0_pins: mcasp0_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9ac, PIN_INPUT_PULLUP | MUX_MODE0) /* mcasp0_ahcklx.mcasp0_ahclkx */
+			AM33XX_IOPAD(0x99c, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* mcasp0_ahclkr.mcasp0_axr2*/
+			AM33XX_IOPAD(0x994, PIN_OUTPUT_PULLUP | MUX_MODE0) /* mcasp0_fsx.mcasp0_fsx */
+			AM33XX_IOPAD(0x990, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mcasp0_aclkx.mcasp0_aclkx */
+			AM33XX_IOPAD(0x86c, PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a11.GPIO1_27 */
+		>;
+	};
+};
+
+&lcdc {
+	status = "okay";
+	port {
+		lcdc_0: endpoint@0 {
+			remote-endpoint = <&hdmi_0>;
+		};
+	};
+};
+
+&i2c0 {
+	tda19988: tda19988 {
+		compatible = "nxp,tda998x";
+		reg = <0x70>;
+
+		pinctrl-names = "default", "off";
+		pinctrl-0 = <&nxp_hdmi_bonelt_pins>;
+		pinctrl-1 = <&nxp_hdmi_bonelt_off_pins>;
+
+		#sound-dai-cells = <0>;
+		audio-ports = <	TDA998x_I2S	0x03>;
+
+		ports {
+			port@0 {
+				hdmi_0: endpoint@0 {
+					remote-endpoint = <&lcdc_0>;
+				};
+			};
+		};
+	};
+};
+
+&mcasp0	{
+	#sound-dai-cells = <0>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&mcasp0_pins>;
+	status = "okay";
+	op-mode = <0>;	/* MCASP_IIS_MODE */
+	tdm-slots = <2>;
+	serial-dir = <	/* 0: INACTIVE, 1: TX, 2: RX */
+			0 0 1 0
+		>;
+	tx-num-evt = <32>;
+	rx-num-evt = <32>;
+};
+
+/ {
+	clk_mcasp0_fixed: clk_mcasp0_fixed {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <24576000>;
+	};
+
+	clk_mcasp0: clk_mcasp0 {
+		#clock-cells = <0>;
+		compatible = "gpio-gate-clock";
+		clocks = <&clk_mcasp0_fixed>;
+		enable-gpios = <&gpio1 27 0>; /* BeagleBone Black Clk enable on GPIO1_27 */
+	};
+
+	sound {
+		compatible = "simple-audio-card";
+		simple-audio-card,name = "TI BeagleBone Black";
+		simple-audio-card,format = "i2s";
+		simple-audio-card,bitclock-master = <&dailink0_master>;
+		simple-audio-card,frame-master = <&dailink0_master>;
+
+		dailink0_master: simple-audio-card,cpu {
+			sound-dai = <&mcasp0>;
+			clocks = <&clk_mcasp0>;
+		};
+
+		simple-audio-card,codec {
+			sound-dai = <&tda19988>;
+		};
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-nhdmi-overlay.dts b/arch/arm/boot/dts/am335x-boneblack-nhdmi-overlay.dts
new file mode 100644
index 0000000..c166c90
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-nhdmi-overlay.dts
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+#include <dt-bindings/display/tda998x.h>
+/* #include "am335x-bone-jtag.dtsi" */
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+/* EMMC in reset */
+&gpio1 {
+	emmc_rst {
+		gpio-hog;
+		gpios = <20 0>;
+		output-high;
+		line-name = "EMMC ResetN";
+	};
+};
+
+&cpu0_opp_table {
+	/*
+	 * All PG 2.0 silicon may not support 1GHz but some of the early
+	 * BeagleBone Blacks have PG 2.0 silicon which is guaranteed
+	 * to support 1GHz OPP so enable it for PG 2.0 on this board.
+	 */
+	oppnitro@1000000000 {
+		opp-supported-hw = <0x06 0x0100>;
+	};
+};
+
+&am33xx_pinmux {
+	nxp_hdmi_bonelt_pins: nxp_hdmi_bonelt_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+			AM33XX_IOPAD(0x8a0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data0.lcd_data0 */
+			AM33XX_IOPAD(0x8a4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data1.lcd_data1 */
+			AM33XX_IOPAD(0x8a8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data2.lcd_data2 */
+			AM33XX_IOPAD(0x8ac, PIN_OUTPUT | MUX_MODE0)		/* lcd_data3.lcd_data3 */
+			AM33XX_IOPAD(0x8b0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data4.lcd_data4 */
+			AM33XX_IOPAD(0x8b4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data5.lcd_data5 */
+			AM33XX_IOPAD(0x8b8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data6.lcd_data6 */
+			AM33XX_IOPAD(0x8bc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data7.lcd_data7 */
+			AM33XX_IOPAD(0x8c0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data8.lcd_data8 */
+			AM33XX_IOPAD(0x8c4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data9.lcd_data9 */
+			AM33XX_IOPAD(0x8c8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data10.lcd_data10 */
+			AM33XX_IOPAD(0x8cc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data11.lcd_data11 */
+			AM33XX_IOPAD(0x8d0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data12.lcd_data12 */
+			AM33XX_IOPAD(0x8d4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data13.lcd_data13 */
+			AM33XX_IOPAD(0x8d8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data14.lcd_data14 */
+			AM33XX_IOPAD(0x8dc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data15.lcd_data15 */
+			AM33XX_IOPAD(0x8e0, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_vsync.lcd_vsync */
+			AM33XX_IOPAD(0x8e4, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_hsync.lcd_hsync */
+			AM33XX_IOPAD(0x8e8, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_pclk.lcd_pclk */
+			AM33XX_IOPAD(0x8ec, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_ac_bias_en.lcd_ac_bias_en */
+		>;
+	};
+	nxp_hdmi_bonelt_off_pins: nxp_hdmi_bonelt_off_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+		>;
+	};
+};
+
+&lcdc {
+	status = "okay";
+	port {
+		lcdc_0: endpoint@0 {
+			remote-endpoint = <&hdmi_0>;
+		};
+	};
+};
+
+&i2c0 {
+	tda19988: tda19988 {
+		compatible = "nxp,tda998x";
+		reg = <0x70>;
+
+		pinctrl-names = "default", "off";
+		pinctrl-0 = <&nxp_hdmi_bonelt_pins>;
+		pinctrl-1 = <&nxp_hdmi_bonelt_off_pins>;
+
+		ports {
+			port@0 {
+				hdmi_0: endpoint@0 {
+					remote-endpoint = <&lcdc_0>;
+				};
+			};
+		};
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-overlay.dts b/arch/arm/boot/dts/am335x-boneblack-overlay.dts
new file mode 100644
index 0000000..e9bbd93
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-overlay.dts
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+/* #include <dt-bindings/display/tda998x.h> */
+/* #include "am335x-bone-jtag.dtsi" */
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+/* EMMC in reset */
+&gpio1 {
+	emmc_rst {
+		gpio-hog;
+		gpios = <20 0>;
+		output-high;
+		line-name = "EMMC ResetN";
+	};
+};
+
+&cpu0_opp_table {
+	/*
+	 * All PG 2.0 silicon may not support 1GHz but some of the early
+	 * BeagleBone Blacks have PG 2.0 silicon which is guaranteed
+	 * to support 1GHz OPP so enable it for PG 2.0 on this board.
+	 */
+	oppnitro@1000000000 {
+		opp-supported-hw = <0x06 0x0100>;
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-roboticscape.dts b/arch/arm/boot/dts/am335x-boneblack-roboticscape.dts
new file mode 100644
index 0000000..16e9376
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-roboticscape.dts
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+ /******************************************************************************
+ * This device tree serves to replace the need for an overlay when using
+ * the RoboticsCape. It is similar to the boneblue tree but preserves
+ * pin config for the black.
+ ******************************************************************************/
+
+
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common-no-capemgr.dtsi"
+#include "am335x-bone-common-universal-pins.dtsi"
+/* #include "am33xx-pruss-rproc.dtsi" */
+#include "am335x-roboticscape.dtsi"
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
+
+&cpu0_opp_table {
+	/*
+	 * All PG 2.0 silicon may not support 1GHz but some of the early
+	 * BeagleBone Blacks have PG 2.0 silicon which is guaranteed
+	 * to support 1GHz OPP so enable it for PG 2.0 on this board.
+	 */
+	oppnitro@1000000000 {
+		opp-supported-hw = <0x06 0x0100>;
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-uboot.dts b/arch/arm/boot/dts/am335x-boneblack-uboot.dts
new file mode 100644
index 0000000..b2f6e36
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-uboot.dts
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-wireless-emmc-overlay.dts b/arch/arm/boot/dts/am335x-boneblack-wireless-emmc-overlay.dts
new file mode 100644
index 0000000..78f63bf
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-wireless-emmc-overlay.dts
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+/* #include <dt-bindings/display/tda998x.h> */
+#include "am335x-boneblack-wl1835.dtsi"
+/* #include "am335x-bone-jtag.dtsi" */
+
+/ {
+	model = "TI AM335x BeagleBone Black Wireless";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
+
+&mac {
+	status = "disabled";
+};
+
+&mmc3 {
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-wireless-roboticscape.dts b/arch/arm/boot/dts/am335x-boneblack-wireless-roboticscape.dts
new file mode 100644
index 0000000..4a72d56
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-wireless-roboticscape.dts
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+ /******************************************************************************
+ * This device tree serves to replace the need for an overlay when using
+ * the RoboticsCape. It is similar to the boneblue tree but preserves
+ * pin config for the black.
+ ******************************************************************************/
+
+
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common-no-capemgr.dtsi"
+#include "am335x-bone-common-universal-pins.dtsi"
+/* #include "am33xx-pruss-rproc.dtsi" */
+#include "am335x-boneblack-wl1835.dtsi"
+#include "am335x-roboticscape.dtsi"
+
+/ {
+	model = "TI AM335x BeagleBone Black Wireless";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-wireless.dts b/arch/arm/boot/dts/am335x-boneblack-wireless.dts
new file mode 100644
index 0000000..9b39648
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-wireless.dts
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+#include <dt-bindings/display/tda998x.h>
+#include "am335x-boneblack-wl1835.dtsi"
+/* #include "am335x-bone-jtag.dtsi" */
+
+/ {
+	model = "TI AM335x BeagleBone Black Wireless";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
+
+&mac {
+	status = "disabled";
+};
+
+&mmc3 {
+	status = "okay";
+};
+
+&am33xx_pinmux {
+	nxp_hdmi_bonelt_pins: nxp_hdmi_bonelt_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+			AM33XX_IOPAD(0x8a0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data0.lcd_data0 */
+			AM33XX_IOPAD(0x8a4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data1.lcd_data1 */
+			AM33XX_IOPAD(0x8a8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data2.lcd_data2 */
+			AM33XX_IOPAD(0x8ac, PIN_OUTPUT | MUX_MODE0)		/* lcd_data3.lcd_data3 */
+			AM33XX_IOPAD(0x8b0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data4.lcd_data4 */
+			AM33XX_IOPAD(0x8b4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data5.lcd_data5 */
+			AM33XX_IOPAD(0x8b8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data6.lcd_data6 */
+			AM33XX_IOPAD(0x8bc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data7.lcd_data7 */
+			AM33XX_IOPAD(0x8c0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data8.lcd_data8 */
+			AM33XX_IOPAD(0x8c4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data9.lcd_data9 */
+			AM33XX_IOPAD(0x8c8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data10.lcd_data10 */
+			AM33XX_IOPAD(0x8cc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data11.lcd_data11 */
+			AM33XX_IOPAD(0x8d0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data12.lcd_data12 */
+			AM33XX_IOPAD(0x8d4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data13.lcd_data13 */
+			AM33XX_IOPAD(0x8d8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data14.lcd_data14 */
+			AM33XX_IOPAD(0x8dc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data15.lcd_data15 */
+			AM33XX_IOPAD(0x8e0, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_vsync.lcd_vsync */
+			AM33XX_IOPAD(0x8e4, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_hsync.lcd_hsync */
+			AM33XX_IOPAD(0x8e8, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_pclk.lcd_pclk */
+			AM33XX_IOPAD(0x8ec, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_ac_bias_en.lcd_ac_bias_en */
+		>;
+	};
+	nxp_hdmi_bonelt_off_pins: nxp_hdmi_bonelt_off_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+		>;
+	};
+
+	mcasp0_pins: mcasp0_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9ac, PIN_INPUT_PULLUP | MUX_MODE0) /* mcasp0_ahcklx.mcasp0_ahclkx */
+			AM33XX_IOPAD(0x99c, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* mcasp0_ahclkr.mcasp0_axr2*/
+			AM33XX_IOPAD(0x994, PIN_OUTPUT_PULLUP | MUX_MODE0) /* mcasp0_fsx.mcasp0_fsx */
+			AM33XX_IOPAD(0x990, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mcasp0_aclkx.mcasp0_aclkx */
+			AM33XX_IOPAD(0x86c, PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a11.GPIO1_27 */
+		>;
+	};
+};
+
+&lcdc {
+	status = "okay";
+	port {
+		lcdc_0: endpoint@0 {
+			remote-endpoint = <&hdmi_0>;
+		};
+	};
+};
+
+&i2c0 {
+	tda19988: tda19988 {
+		compatible = "nxp,tda998x";
+		reg = <0x70>;
+
+		pinctrl-names = "default", "off";
+		pinctrl-0 = <&nxp_hdmi_bonelt_pins>;
+		pinctrl-1 = <&nxp_hdmi_bonelt_off_pins>;
+
+		#sound-dai-cells = <0>;
+		audio-ports = <	TDA998x_I2S	0x03>;
+
+		ports {
+			port@0 {
+				hdmi_0: endpoint@0 {
+					remote-endpoint = <&lcdc_0>;
+				};
+			};
+		};
+	};
+};
+
+&mcasp0	{
+	#sound-dai-cells = <0>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&mcasp0_pins>;
+	status = "okay";
+	op-mode = <0>;	/* MCASP_IIS_MODE */
+	tdm-slots = <2>;
+	serial-dir = <	/* 0: INACTIVE, 1: TX, 2: RX */
+			0 0 1 0
+		>;
+	tx-num-evt = <32>;
+	rx-num-evt = <32>;
+};
+
+/ {
+	clk_mcasp0_fixed: clk_mcasp0_fixed {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <24576000>;
+	};
+
+	clk_mcasp0: clk_mcasp0 {
+		#clock-cells = <0>;
+		compatible = "gpio-gate-clock";
+		clocks = <&clk_mcasp0_fixed>;
+		enable-gpios = <&gpio1 27 0>; /* BeagleBone Black Clk enable on GPIO1_27 */
+	};
+
+	sound {
+		compatible = "simple-audio-card";
+		simple-audio-card,name = "TI BeagleBone Black";
+		simple-audio-card,format = "i2s";
+		simple-audio-card,bitclock-master = <&dailink0_master>;
+		simple-audio-card,frame-master = <&dailink0_master>;
+
+		dailink0_master: simple-audio-card,cpu {
+			sound-dai = <&mcasp0>;
+			clocks = <&clk_mcasp0>;
+		};
+
+		simple-audio-card,codec {
+			sound-dai = <&tda19988>;
+		};
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-wl1835.dtsi b/arch/arm/boot/dts/am335x-boneblack-wl1835.dtsi
new file mode 100644
index 0000000..ec6c0e4
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-wl1835.dtsi
@@ -0,0 +1,143 @@
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+	wlan_en_reg: fixedregulator@2 {
+		compatible = "regulator-fixed";
+		regulator-name = "wlan-en-regulator";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		startup-delay-us= <70000>;
+
+		/* WL_EN */
+		gpio = <&gpio3 9 0>;
+		enable-active-high;
+	};
+
+	leds {
+		pinctrl-names = "default";
+		pinctrl-0 = <&wl18xx_pins>;
+		compatible = "gpio-leds";
+
+		wl18xx_bt_en {
+			label = "wl18xx_bt_en";
+			gpios = <&gpio0 28 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+	};
+
+	btwilink {
+		compatible = "btwilink";
+	};
+};
+
+&am33xx_pinmux {
+	wl18xx_pins: pinmux_wl18xx_pins {
+		pinctrl-single,pins = <
+			0x128 (PIN_OUTPUT_PULLUP | MUX_MODE7)	/* (K17) gmii1_txd0.gpio0[28] - BT_EN */
+		>;
+	};
+
+	wlbtbuf_pin: pinmux_wlbtbuf_pin {
+		pinctrl-single,pins = <
+			0x130 ( PIN_OUTPUT_PULLUP | MUX_MODE7 ) /* (L18) gmii1_rxclk.gpio3[10] - LS_BUF_EN */
+		>;
+	};
+
+	mmc3_pins: pinmux_mmc3_pins {
+		pinctrl-single,pins = <
+			0x13c ( PIN_INPUT_PULLUP | MUX_MODE6 ) /* (L15) gmii1_rxd1.mmc2_clk */
+			0x114 ( PIN_INPUT_PULLUP | MUX_MODE6 ) /* (J16) gmii1_txen.mmc2_cmd */
+			0x118 ( PIN_INPUT_PULLUP | MUX_MODE5 ) /* (J17) gmii1_rxdv.mmc2_dat0 */
+			0x11c ( PIN_INPUT_PULLUP | MUX_MODE5 ) /* (J18) gmii1_txd3.mmc2_dat1 */
+			0x120 ( PIN_INPUT_PULLUP | MUX_MODE5 ) /* (K15) gmii1_txd2.mmc2_dat2 */
+			0x108 ( PIN_INPUT_PULLUP | MUX_MODE5 ) /* (H16) gmii1_col.mmc2_dat3 */
+		>;
+	};
+
+	mmc3_pins_sleep: pinmux_mmc3_pins_sleep {
+		pinctrl-single,pins = <
+			0x13c ( PIN_INPUT_PULLDOWN | MUX_MODE6 ) /* (L15) gmii1_rxd1.mmc2_clk */
+			0x114 ( PIN_INPUT_PULLDOWN | MUX_MODE6 ) /* (J16) gmii1_txen.mmc2_cmd */
+			0x118 ( PIN_INPUT_PULLDOWN | MUX_MODE5 ) /* (J17) gmii1_rxdv.mmc2_dat0 */
+			0x11c ( PIN_INPUT_PULLDOWN | MUX_MODE5 ) /* (J18) gmii1_txd3.mmc2_dat1 */
+			0x120 ( PIN_INPUT_PULLDOWN | MUX_MODE5 ) /* (K15) gmii1_txd2.mmc2_dat2 */
+			0x108 ( PIN_INPUT_PULLDOWN | MUX_MODE5 ) /* (H16) gmii1_col.mmc2_dat3 */
+		>;
+	};
+
+	/* wl18xx card enable/irq GPIOs. */
+	wlan_pins: pinmux_wlan_pins {
+		pinctrl-single,pins = <
+			0x12c (PIN_OUTPUT_PULLDOWN | MUX_MODE7 ) /* (K18) gmii1_txclk.gpio3[9] - WL_EN */
+			0x144 (PIN_INPUT_PULLDOWN | MUX_MODE7 )  /* (H18) rmii1_refclk.gpio0[29] - WL_IRQ */
+		>;
+	};
+
+	/* wl18xx card enable/irq GPIOs. */
+	wlan_pins_sleep: pinmux_wlan_pins_sleep {
+		pinctrl-single,pins = <
+			0x12c (PIN_OUTPUT_PULLDOWN | MUX_MODE7 ) /* (K18) gmii1_txclk.gpio3[9] - WL_EN */
+			0x144 (PIN_INPUT_PULLDOWN | MUX_MODE7 )  /* (H18) rmii1_refclk.gpio0[29] - WL_IRQ */
+		>;
+	};
+
+	uart3_pins_default: pinmux_uart3_pins_default {
+		pinctrl-single,pins = <
+			0x134 ( PIN_INPUT_PULLUP | MUX_MODE1 ) 		/* (L17) gmii1_rxd3.uart3_rxd */
+			0x138 ( PIN_OUTPUT_PULLDOWN | MUX_MODE1 )	/* (L16) gmii1_rxd2.uart3_txd */
+			0x148 ( PIN_INPUT | MUX_MODE3 ) 		/* (M17) mdio_data.uart3_ctsn */
+			0x14c ( PIN_OUTPUT_PULLDOWN | MUX_MODE3 ) 	/* (M18) mdio_clk.uart3_rtsn */
+		>;
+	};
+
+	uart3_pins_sleep: pinmux_uart3_pins_sleep {
+		pinctrl-single,pins = <
+			0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (L17) gmii1_rxd3.uart3_rxd */
+			0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (L16) gmii1_rxd2.uart3_txd */
+			0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (M17) mdio_data.uart3_ctsn */
+			0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (M18) mdio_clk.uart3_rtsn */
+		>;
+	};
+};
+
+&mmc3 {
+	dmas = <&edma_xbar 12 0 1
+		&edma_xbar 13 0 2>;
+	dma-names = "tx", "rx";
+	status = "okay";
+	vmmc-supply = <&wlan_en_reg>;
+	bus-width = <4>;
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&mmc3_pins &wlan_pins &wlbtbuf_pin>;
+	pinctrl-1 = <&mmc3_pins_sleep &wlan_pins_sleep &wlbtbuf_pin>;
+	ti,non-removable;
+	ti,needs-special-hs-handling;
+	cap-power-off-card;
+	keep-power-in-suspend;
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	wlcore: wlcore@0 {
+		compatible = "ti,wl1835";
+		reg = <2>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <29 IRQ_TYPE_EDGE_RISING>;
+	};
+};
+
+&uart3 {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&uart3_pins_default>;
+	pinctrl-1 = <&uart3_pins_sleep>;
+	status = "okay";
+};
+
+&gpio3 {
+	ls_buf_en {
+		gpio-hog;
+		gpios = <10 GPIO_ACTIVE_HIGH>;
+		output-high;
+		line-name = "LS_BUF_EN";
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-wl1835mod-cape.dtsi b/arch/arm/boot/dts/am335x-boneblack-wl1835mod-cape.dtsi
new file mode 100644
index 0000000..94caa22
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-wl1835mod-cape.dtsi
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+	wlan_en_reg: fixedregulator@2 {
+		compatible = "regulator-fixed";
+		regulator-name = "wlan-en-regulator";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+
+		/* WL_EN */
+		gpio = <&gpio0 26 0>;
+		enable-active-high;
+	};
+
+	kim {
+	        compatible = "kim";
+	        nshutdown_gpio = <44>; /* Bank1, pin12 */
+	        dev_name = "/dev/ttyO4";
+	        flow_cntrl = <1>;
+	        baud_rate = <3000000>;
+	};
+
+	btwilink {
+	        compatible = "btwilink";
+	};
+};
+
+&am33xx_pinmux {
+	bt_pins: pinmux_bt_pins {
+		pinctrl-single,pins = <
+			0x30 (PIN_OUTPUT_PULLUP | MUX_MODE7)	/* gpmc_ad12.gpio1_12 */
+		>;
+	};
+
+	mmc2_pins: pinmux_mmc2_pins {
+		pinctrl-single,pins = <
+			0x80 (PIN_INPUT_PULLUP | MUX_MODE2) /* gpmc_csn1.mmc1_clk */
+			0x84 (PIN_INPUT_PULLUP | MUX_MODE2) /* gpmc_csn2.mmc1_cmd */
+			0x00 (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad0.mmc1_dat0 */
+			0x04 (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad1.mmc1_dat1 */
+			0x08 (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad2.mmc1_dat2 */
+			0x0c (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad3.mmc1_dat3 */
+		>;
+	};
+
+	mmc2_pins_sleep: pinmux_mmc2_pins_sleep {
+		pinctrl-single,pins = <
+			0x80 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_csn1.mmc1_clk */
+			0x84 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_csn2.mmc1_cmd */
+			0x00 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad0.mmc1_dat0 */
+			0x04 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad1.mmc1_dat1 */
+			0x08 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad2.mmc1_dat2 */
+			0x0c (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad3.mmc1_dat3 */
+		>;
+	};
+
+	/* wl18xx card enable/irq GPIOs. */
+	wlan_pins: pinmux_wlan_pins {
+		pinctrl-single,pins = <
+			0x28 (PIN_OUTPUT_PULLDOWN | MUX_MODE7)	/* gpmc_ad10.gpio0_26 WL_EN*/
+			0x2C (PIN_INPUT_PULLUP | MUX_MODE7)	/* gpmc_ad11.gpio0_27 WL_IRQ*/
+			0x7C (PIN_OUTPUT_PULLUP | MUX_MODE0)	/* gpmc_csn0.gpio1_29 BF_EN*/
+		>;
+	};
+
+	/* wl18xx card enable/irq GPIOs. */
+	wlan_pins_sleep: pinmux_wlan_pins_sleep {
+		pinctrl-single,pins = <
+			0x28 (PIN_OUTPUT_PULLUP | MUX_MODE7)	/* gpmc_ad10.gpio0_26 WL_EN*/
+			0x2C (PIN_INPUT_PULLUP | MUX_MODE7)	/* gpmc_ad11.gpio0_27 WL_IRQ*/
+			0x7C (PIN_OUTPUT_PULLUP | MUX_MODE0)	/* gpmc_csn0.gpio1_29 BF_EN*/
+		>;
+	};
+
+	uart4_pins_default: pinmux_uart4_pins_default {
+		pinctrl-single,pins = <
+			0xD0 (PIN_INPUT | MUX_MODE6)		/* lcd_data12.uart4_cts */
+			0xD4 (PIN_OUTPUT_PULLDOWN | MUX_MODE6)	/* lcd_data13.uart4_rts */
+			0x70 (PIN_INPUT_PULLUP | MUX_MODE6)	/* gpmc_wait0.uart4_rxd */
+			0x74 (PIN_OUTPUT_PULLDOWN | MUX_MODE6)	/* gpmc_wpn.uart4_txd */
+		>;
+	};
+
+	uart4_pins_sleep: pinmux_uart4_pins_sleep {
+		pinctrl-single,pins = <
+			0xD0 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* lcd_data12.uart4_cts */
+			0xD4 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* lcd_data13.uart4_rts */
+			0x70 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* gpmc_wait0.uart4_rxd */
+			0x74 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* gpmc_wpn.uart4_txd */
+		>;
+	};
+};
+
+&mmc2 {
+	status = "okay";
+	vmmc-supply = <&wlan_en_reg>;
+	bus-width = <4>;
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&mmc2_pins &wlan_pins>;
+	pinctrl-1 = <&mmc2_pins_sleep &wlan_pins_sleep>;
+	ti,non-removable;
+	ti,needs-special-hs-handling;
+	cap-power-off-card;
+	keep-power-in-suspend;
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	wlcore: wlcore@0 {
+		compatible = "ti,wl1835";
+		reg = <2>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <27 IRQ_TYPE_LEVEL_HIGH>;
+	};
+};
+
+&uart4 {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&uart4_pins_default>;
+	pinctrl-1 = <&uart4_pins_sleep>;
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-boneblack-wl1835mod.dts b/arch/arm/boot/dts/am335x-boneblack-wl1835mod.dts
new file mode 100644
index 0000000..f35b64a
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblack-wl1835mod.dts
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common-no-capemgr.dtsi"
+
+/ {
+	model = "TI AM335x BeagleBone Black";
+	compatible = "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+/* EMMC in reset */
+&gpio1 {
+	emmc_rst {
+		gpio-hog;
+		gpios = <20 0>;
+		output-high;
+		line-name = "EMMC ResetN";
+	};
+};
+
+&cpu0_opp_table {
+	/*
+	 * All PG 2.0 silicon may not support 1GHz but some of the early
+	 * BeagleBone Blacks have PG 2.0 silicon which is guaranteed
+	 * to support 1GHz OPP so enable it for PG 2.0 on this board.
+	 */
+	oppnitro@1000000000 {
+		opp-supported-hw = <0x06 0x0100>;
+	};
+};
+
+#include "am335x-boneblack-wl1835mod-cape.dtsi"
diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts
index 6bbb1fe..8f5d559 100644
--- a/arch/arm/boot/dts/am335x-boneblack.dts
+++ b/arch/arm/boot/dts/am335x-boneblack.dts
@@ -10,6 +10,7 @@
 #include "am33xx.dtsi"
 #include "am335x-bone-common.dtsi"
 #include <dt-bindings/display/tda998x.h>
+/* #include "am335x-bone-jtag.dtsi" */
 
 / {
 	model = "TI AM335x BeagleBone Black";
@@ -34,6 +35,17 @@
 	status = "okay";
 };
 
+&cpu0_opp_table {
+	/*
+	 * All PG 2.0 silicon may not support 1GHz but some of the early
+	 * BeagleBone Blacks have PG 2.0 silicon which is guaranteed
+	 * to support 1GHz OPP so enable it for PG 2.0 on this board.
+	 */
+	oppnitro@1000000000 {
+		opp-supported-hw = <0x06 0x0100>;
+	};
+};
+
 &am33xx_pinmux {
 	nxp_hdmi_bonelt_pins: nxp_hdmi_bonelt_pins {
 		pinctrl-single,pins = <
@@ -108,10 +120,6 @@
 	};
 };
 
-&rtc {
-	system-power-controller;
-};
-
 &mcasp0	{
 	#sound-dai-cells = <0>;
 	pinctrl-names = "default";
diff --git b/arch/arm/boot/dts/am335x-boneblue-wl1835.dtsi b/arch/arm/boot/dts/am335x-boneblue-wl1835.dtsi
new file mode 100644
index 0000000..64731b0
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblue-wl1835.dtsi
@@ -0,0 +1,143 @@
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+	wlan_en_reg: fixedregulator@2 {
+		compatible = "regulator-fixed";
+		regulator-name = "wlan-en-regulator";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		startup-delay-us= <70000>;
+
+		/* WL_EN */
+		gpio = <&gpio3 9 0>;
+		enable-active-high;
+	};
+
+	leds {
+		pinctrl-names = "default";
+		pinctrl-0 = <&wl18xx_pins>;
+		compatible = "gpio-leds";
+
+		wl18xx_bt_en {
+			label = "wl18xx_bt_en";
+			gpios = <&gpio0 28 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+	};
+
+	btwilink {
+		compatible = "btwilink";
+	};
+};
+
+&am33xx_pinmux {
+	wl18xx_pins: pinmux_wl18xx_pins {
+		pinctrl-single,pins = <
+			0x128 (PIN_OUTPUT_PULLUP | MUX_MODE7)	/* (K17) gmii1_txd0.gpio0[28] - BT_EN */
+		>;
+	};
+
+	wlbtbuf_pin: pinmux_wlbtbuf_pin {
+		pinctrl-single,pins = <
+			0x130 ( PIN_OUTPUT_PULLUP | MUX_MODE7 ) /* (L18) gmii1_rxclk.gpio3[10] - LS_BUF_EN */
+		>;
+	};
+
+	mmc3_pins: pinmux_mmc3_pins {
+		pinctrl-single,pins = <
+			0x13c ( PIN_INPUT_PULLUP | MUX_MODE6 ) /* (L15) gmii1_rxd1.mmc2_clk */
+			0x114 ( PIN_INPUT_PULLUP | MUX_MODE6 ) /* (J16) gmii1_txen.mmc2_cmd */
+			0x118 ( PIN_INPUT_PULLUP | MUX_MODE5 ) /* (J17) gmii1_rxdv.mmc2_dat0 */
+			0x11c ( PIN_INPUT_PULLUP | MUX_MODE5 ) /* (J18) gmii1_txd3.mmc2_dat1 */
+			0x120 ( PIN_INPUT_PULLUP | MUX_MODE5 ) /* (K15) gmii1_txd2.mmc2_dat2 */
+			0x108 ( PIN_INPUT_PULLUP | MUX_MODE5 ) /* (H16) gmii1_col.mmc2_dat3 */
+		>;
+	};
+
+	mmc3_pins_sleep: pinmux_mmc3_pins_sleep {
+		pinctrl-single,pins = <
+			0x13c ( PIN_INPUT_PULLDOWN | MUX_MODE6 ) /* (L15) gmii1_rxd1.mmc2_clk */
+			0x114 ( PIN_INPUT_PULLDOWN | MUX_MODE6 ) /* (J16) gmii1_txen.mmc2_cmd */
+			0x118 ( PIN_INPUT_PULLDOWN | MUX_MODE5 ) /* (J17) gmii1_rxdv.mmc2_dat0 */
+			0x11c ( PIN_INPUT_PULLDOWN | MUX_MODE5 ) /* (J18) gmii1_txd3.mmc2_dat1 */
+			0x120 ( PIN_INPUT_PULLDOWN | MUX_MODE5 ) /* (K15) gmii1_txd2.mmc2_dat2 */
+			0x108 ( PIN_INPUT_PULLDOWN | MUX_MODE5 ) /* (H16) gmii1_col.mmc2_dat3 */
+		>;
+	};
+
+	/* wl18xx card enable/irq GPIOs. */
+	wlan_pins: pinmux_wlan_pins {
+		pinctrl-single,pins = <
+			0x12c (PIN_OUTPUT_PULLDOWN | MUX_MODE7 ) /* (K18) gmii1_txclk.gpio3[9] - WL_EN */
+			0x124 (PIN_INPUT_PULLUP | MUX_MODE7 )    /* (K16) gmii1_txd1.gpio0[21] - WL_IRQ */
+		>;
+	};
+
+	/* wl18xx card enable/irq GPIOs. */
+	wlan_pins_sleep: pinmux_wlan_pins_sleep {
+		pinctrl-single,pins = <
+			0x12c (PIN_OUTPUT_PULLDOWN | MUX_MODE7 ) /* (K18) gmii1_txclk.gpio3[9] - WL_EN */
+			0x124 (PIN_INPUT_PULLUP | MUX_MODE7 )    /* (K16) gmii1_txd1.gpio0[21] - WL_IRQ */
+		>;
+	};
+
+	uart3_pins_default: pinmux_uart3_pins_default {
+		pinctrl-single,pins = <
+			0x134 ( PIN_INPUT_PULLUP | MUX_MODE1 ) 		/* (L17) gmii1_rxd3.uart3_rxd */
+			0x138 ( PIN_OUTPUT_PULLDOWN | MUX_MODE1 )	/* (L16) gmii1_rxd2.uart3_txd */
+			0x148 ( PIN_INPUT | MUX_MODE3 ) 		/* (M17) mdio_data.uart3_ctsn */
+			0x14c ( PIN_OUTPUT_PULLDOWN | MUX_MODE3 ) 	/* (M18) mdio_clk.uart3_rtsn */
+		>;
+	};
+
+	uart3_pins_sleep: pinmux_uart3_pins_sleep {
+		pinctrl-single,pins = <
+			0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (L17) gmii1_rxd3.uart3_rxd */
+			0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (L16) gmii1_rxd2.uart3_txd */
+			0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (M17) mdio_data.uart3_ctsn */
+			0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (M18) mdio_clk.uart3_rtsn */
+		>;
+	};
+};
+
+&mmc3 {
+	dmas = <&edma_xbar 12 0 1
+		&edma_xbar 13 0 2>;
+	dma-names = "tx", "rx";
+	status = "okay";
+	vmmc-supply = <&wlan_en_reg>;
+	bus-width = <4>;
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&mmc3_pins &wlan_pins &wlbtbuf_pin>;
+	pinctrl-1 = <&mmc3_pins_sleep &wlan_pins_sleep &wlbtbuf_pin>;
+	ti,non-removable;
+	ti,needs-special-hs-handling;
+	cap-power-off-card;
+	keep-power-in-suspend;
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	wlcore: wlcore@0 {
+		compatible = "ti,wl1835";
+		reg = <2>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <21 IRQ_TYPE_EDGE_RISING>;
+	};
+};
+
+&uart3 {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&uart3_pins_default>;
+	pinctrl-1 = <&uart3_pins_sleep>;
+	status = "okay";
+};
+
+&gpio3 {
+	ls_buf_en {
+		gpio-hog;
+		gpios = <10 GPIO_ACTIVE_HIGH>;
+		output-high;
+		line-name = "LS_BUF_EN";
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-boneblue.dts b/arch/arm/boot/dts/am335x-boneblue.dts
new file mode 100644
index 0000000..e2a2fb5
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-boneblue.dts
@@ -0,0 +1,569 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common-no-capemgr.dtsi"
+#include "am335x-bone-common-universal-pins.dtsi"
+#include "am335x-boneblue-wl1835.dtsi"
+/* #include "am33xx-pruss-rproc.dtsi" */
+
+#define BLUE_IO(x, y) AM33XX_IOPAD((x)*4+0x800, (y)) /* not used anymore */
+
+
+/ {
+	model = "TI AM335x BeagleBone Blue";
+	compatible = "ti,am335x-bone-blue", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
+
+&mac {
+	status = "disabled";
+};
+
+/*******************************************************************************
+* Pin Muxing
+*******************************************************************************/
+&am33xx_pinmux {
+
+	/***************************************************************************
+	* Static Pinmux
+	***************************************************************************/
+	mux_helper_pins: pins {
+		pinctrl-single,pins = <
+
+			/* GPIO Inputs */
+			0x09c 0x37	/*P8.9  Pause BUTTON, input pullup*/
+			0x098 0x37	/*P8.10 MODE BUTTON input pullup*/
+			0x1AC 0x37	/*P9.25 MPU-9150 INTERRUPT IN*/
+
+			/* LEDs GPIO Out*/
+			0x090 0x0F	/* P8.7 R7 LED_RED */
+			0x094 0x0F	/* P8.8 T7 LED_GREEN */
+			0x02C 0x0F	/* P8.17 U12 BATT_LED_1 */
+			0x0DC 0x0F  /* P8.32 T5  BATT_LED_2 diff from cape! */
+			0x07c 0x0F	/* P8.26 V6  BATT_LED_3 */
+			0x028 0x0F	/* P8.14 T11 BATT_LED_4 */
+
+			/* Motor Control GPIO Out*/
+			0x0a8 0x0F	/*P8.43 MDIR_3B*/
+			0x0ac 0x0F	/*P8.44 MDIR_3A*/
+			0x0a0 0x0F	/*P8.45 MDIR_4A*/
+			0x0a4 0x0F	/*P8.46 MDIR_4B*/
+			0x074 0x0F	/*P9.13 MDIR_1B*/
+			0x040 0x0F	/*P9.15 MDIR_2A*/
+			0x1b4 0x0F	/*P9.41 MOT_STBY*/
+			0x088 0x0F	/*T13   MDIR_1A different from cape! */
+			0x0D8 0x0F	/*P8.31 MDIR_2B different from cape! */
+
+			/* HRPWM 1 */
+			0x048  0x6 /* P9_14 | MODE 6 */
+			0x04c  0x6 /* P9_16 | MODE 6 */
+
+			/* HRPWM 2 */
+			0x020  0x4 /* P8_19 | MODE 4 */
+			0x024  0x4 /* P8_13 | MODE 4 */
+
+			/* EQEP */
+			0x1A0 0x31  /* P9_42,EQEP0A, MODE1 */
+			0x1A4 0x31  /* P9_27,EQEP0B, MODE1 */
+			0x0D4 0x32  /* P8_33,EQEP1B, MODE2 */
+			0x0D0 0x32  /* P8_35,EQEP1A, MODE2 */
+			0x030 0x34  /* P8_12,EQEP2A, MODE4 */
+			0x034 0x34  /* P8_11,EQEP2B, MODE4 */
+
+			/* PRU encoder input */
+			0x03c 0x36	/* P8_15,PRU0_r31_15,MODE6 */
+			0x038 0x36	/* P8_16,PRU0_r31_16,MODE6 */
+
+			/* PRU Servo output */
+			0x0e0 0x05	/*pru1_pru_r30_8, MODE5*/
+			0x0e8 0x05	/*pru1_pru_r30_10, MODE5 */
+			0x0e4 0x05	/*pr1_pru1_pru_r30_9, MODE5 */
+			0x0ec 0x05	/*pru1_pru_r30_11, MODE5 */
+			0x0b8 0x05	/*pru1_pru_r30_6, MODE5 */
+			0x0bc 0x05	/*pru1_pru_r30_7, MODE5 */
+			0x0b0 0x05	/*pru1_pru_r30_4, MODE5 */
+			0x0b4 0x05	/*pru1_pru_r30_5, MODE5 */
+			0x0C8 0x0F	/*P8.36, SERVO_PWR GPIO OUT*/
+
+			/* I2C1 */
+			0x15C 0x32	/* P9.17,i2c1_scl,INPUT_PULLUP,MODE2 */
+			0x158 0x32	/* P9.18,i2c1_sda,INPUT_PULLUP,MODE2 */
+
+			/* I2C2 */
+			0x17c  0x73 /* P9.19, i2c2_sda, mode 3 */
+			0x178  0x73 /* P9.20, i2c2_sda, mode 3 */
+
+			/* UART5 */
+			0x0C4 0x34	/* P8.38,uart5_rxd,MODE4 */
+			0x0C0 0x14	/* P8.37,uart5_txd,MODE4 */
+
+			/* WILINK 8 */
+			0x08c 0x0F	/*P8.18 V12 A2DP FSYNC */
+			0x078 0x0F	/*P9.12 A2DP_CLOCK*/
+
+			/* DCAN */
+			0x16c ( PIN_INPUT  | MUX_MODE2 ) /* (E17) uart0_rtsn.dcan1_rx */
+			0x168 ( PIN_OUTPUT | MUX_MODE2 ) /* (E18) uart0_ctsn.dcan1_tx */
+		>;
+
+		/***********************************************************************
+		* New configurable pinmux modes for pins not on Black headers
+		***********************************************************************/
+		/* H18 SPI1_SS1    */
+		H18_default_pin: pinmux_H18_default_pin {
+			pinctrl-single,pins = < 0x144 ( PIN_OUTPUT | MUX_MODE4 ) >; };
+		H18_gpio_pin: pinmux_H18_gpio_pin {
+			pinctrl-single,pins = < 0x144 ( PIN_OUTPUT | MUX_MODE7 ) >; };
+		H18_gpio_pu_pin: pinmux_H18_gpio_pu_pin {
+			pinctrl-single,pins = < 0x144 ( PIN_INPUT_PULLUP | MUX_MODE7 ) >; };
+		H18_gpio_pd_pin: pinmux_H18_gpio_pd_pin {
+			pinctrl-single,pins = < 0x144 ( PIN_INPUT_PULLDOWN | MUX_MODE7 ) >; };
+		H18_spi_pin: pinmux_H18_spi_pin {
+			pinctrl-single,pins = < 0x144 ( PIN_OUTPUT | MUX_MODE2 ) >; };
+
+		/* C18 SPI1_SS2    */
+		C18_default_pin: pinmux_C18_default_pin {
+			pinctrl-single,pins = < 0x164 ( PIN_OUTPUT | MUX_MODE4 ) >; };
+		C18_gpio_pin: pinmux_C18_gpio_pin {
+			pinctrl-single,pins = < 0x164 ( PIN_OUTPUT | MUX_MODE7 ) >; };
+		C18_gpio_pu_pin: pinmux_C18_gpio_pu_pin {
+			pinctrl-single,pins = < 0x164 ( PIN_INPUT_PULLUP | MUX_MODE7 ) >; };
+		C18_gpio_pd_pin: pinmux_C18_gpio_pd_pin {
+			pinctrl-single,pins = < 0x164 ( PIN_INPUT_PULLDOWN | MUX_MODE7 ) >; };
+		C18_spi_pin: pinmux_C18_spi_pin {
+			pinctrl-single,pins = < 0x164 ( PIN_OUTPUT | MUX_MODE2 ) >; };
+
+		/* U16 BLUE_GP0_PIN_3 gpio 1_25   */
+		U16_default_pin: pinmux_U16_default_pin {
+			pinctrl-single,pins = < 0x064 ( PIN_INPUT_PULLUP | MUX_MODE7 ) >; };
+		U16_gpio_pin: pinmux_U16_gpio_pin {
+			pinctrl-single,pins = < 0x064 ( PIN_OUTPUT | MUX_MODE7 ) >; };
+		U16_gpio_pu_pin: pinmux_U16_gpio_pu_pin {
+			pinctrl-single,pins = < 0x064 ( PIN_INPUT_PULLUP | MUX_MODE7 ) >; };
+		U16_gpio_pd_pin: pinmux_U16_gpio_pd_pin {
+			pinctrl-single,pins = < 0x064 ( PIN_INPUT_PULLDOWN | MUX_MODE7 ) >; };
+
+		/* D13 BLUE_GP0_PIN_5 gpio 3_20  */
+		D13_default_pin: pinmux_D13_default_pin {
+			pinctrl-single,pins = < 0x1A8 ( PIN_INPUT_PULLUP | MUX_MODE7 ) >; };
+		D13_gpio_pin: pinmux_D13_gpio_pin {
+			pinctrl-single,pins = < 0x1A8 ( PIN_OUTPUT | MUX_MODE7 ) >; };
+		D13_gpio_pu_pin: pinmux_D13_gpio_pu_pin {
+			pinctrl-single,pins = < 0x1A8 ( PIN_INPUT_PULLUP | MUX_MODE7 ) >; };
+		D13_gpio_pd_pin: pinmux_D13_gpio_pd_pin {
+			pinctrl-single,pins = < 0x1A8 ( PIN_INPUT_PULLDOWN | MUX_MODE7 ) >; };
+
+		/* J15 BLUE_GP1_PIN_3 gpio 3_2  */
+		J15_default_pin: pinmux_J15_default_pin {
+			pinctrl-single,pins = < 0x110 ( PIN_INPUT_PULLUP | MUX_MODE7 ) >; };
+		J15_gpio_pin: pinmux_J15_gpio_pin {
+			pinctrl-single,pins = < 0x110 ( PIN_OUTPUT | MUX_MODE7 ) >; };
+		J15_gpio_pu_pin: pinmux_J15_gpio_pu_pin {
+			pinctrl-single,pins = < 0x110 ( PIN_INPUT_PULLUP | MUX_MODE7 ) >; };
+		J15_gpio_pd_pin: pinmux_J15_gpio_pd_pin {
+			pinctrl-single,pins = < 0x110 ( PIN_INPUT_PULLDOWN | MUX_MODE7 ) >; };
+
+		/* H17 BLUE_GP1_PIN_4 gpio 3_1  */
+		H17_default_pin: pinmux_H17_default_pin {
+			pinctrl-single,pins = < 0x10C ( PIN_INPUT_PULLUP | MUX_MODE7 ) >; };
+		H17_gpio_pin: pinmux_H17_gpio_pin {
+			pinctrl-single,pins = < 0x10C ( PIN_OUTPUT | MUX_MODE7 ) >; };
+		H17_gpio_pu_pin: pinmux_H17_gpio_pu_pin {
+			pinctrl-single,pins = < 0x10C ( PIN_INPUT_PULLUP | MUX_MODE7 ) >; };
+		H17_gpio_pd_pin: pinmux_H17_gpio_pd_pin {
+			pinctrl-single,pins = < 0x10C ( PIN_INPUT_PULLDOWN | MUX_MODE7 ) >; };
+	};
+
+};
+
+
+/*******************************************************************************
+* apply static and dynamic pinmux modes listed above. Pins shared with the black
+* header pins get the modes from am335x-boneblack-common-universal-pins.dtsi
+*******************************************************************************/
+&ocp {
+	/* activate the static pinmux helper list of pin modes above */
+	test_helper: helper {
+		compatible = "bone-pinmux-helper";
+		pinctrl-names = "default";
+		pinctrl-0 = <&mux_helper_pins>;
+
+		status = "okay";
+	};
+
+	/* UART4 RX DSM */
+	P9_11_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "uart";
+		pinctrl-0 = <&P9_11_default_pin>;
+		pinctrl-1 = <&P9_11_gpio_pin>;
+		pinctrl-2 = <&P9_11_gpio_pu_pin>;
+		pinctrl-3 = <&P9_11_gpio_pd_pin>;
+		pinctrl-4 = <&P9_11_uart_pin>;
+	};
+
+	/* UART 2 TX GPS*/
+	P9_21_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "spi", "uart", "i2c", "pwm";
+        pinctrl-0 = <&P9_21_default_pin>;
+        pinctrl-1 = <&P9_21_gpio_pin>;
+        pinctrl-2 = <&P9_21_gpio_pu_pin>;
+        pinctrl-3 = <&P9_21_gpio_pd_pin>;
+        pinctrl-4 = <&P9_21_spi_pin>;
+        pinctrl-5 = <&P9_21_uart_pin>;
+        pinctrl-6 = <&P9_21_i2c_pin>;
+        pinctrl-7 = <&P9_21_pwm_pin>;
+    };
+
+    /* UART 2 RX GPS */
+    P9_22_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "spi", "uart", "i2c", "pwm";
+        pinctrl-0 = <&P9_22_default_pin>;
+        pinctrl-1 = <&P9_22_gpio_pin>;
+        pinctrl-2 = <&P9_22_gpio_pu_pin>;
+        pinctrl-3 = <&P9_22_gpio_pd_pin>;
+        pinctrl-4 = <&P9_22_spi_pin>;
+        pinctrl-5 = <&P9_22_uart_pin>;
+        pinctrl-6 = <&P9_22_i2c_pin>;
+        pinctrl-7 = <&P9_22_pwm_pin>;
+    };
+
+    /* SPI MISO */
+    P9_29_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pruout", "pruin";
+        pinctrl-0 = <&P9_29_default_pin>;
+        pinctrl-1 = <&P9_29_gpio_pin>;
+        pinctrl-2 = <&P9_29_gpio_pu_pin>;
+        pinctrl-3 = <&P9_29_gpio_pd_pin>;
+        pinctrl-4 = <&P9_29_pwm_pin>;
+        pinctrl-5 = <&P9_29_spi_pin>;
+        pinctrl-6 = <&P9_29_pruout_pin>;
+        pinctrl-7 = <&P9_29_pruin_pin>;
+    };
+
+    /* SPI MOSI */
+    P9_30_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pruout", "pruin";
+        pinctrl-0 = <&P9_30_default_pin>;
+        pinctrl-1 = <&P9_30_gpio_pin>;
+        pinctrl-2 = <&P9_30_gpio_pu_pin>;
+        pinctrl-3 = <&P9_30_gpio_pd_pin>;
+        pinctrl-4 = <&P9_30_pwm_pin>;
+        pinctrl-5 = <&P9_30_spi_pin>;
+        pinctrl-6 = <&P9_30_pruout_pin>;
+        pinctrl-7 = <&P9_30_pruin_pin>;
+    };
+
+    /* SPI SCK */
+    P9_31_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pruout", "pruin";
+        pinctrl-0 = <&P9_31_default_pin>;
+        pinctrl-1 = <&P9_31_gpio_pin>;
+        pinctrl-2 = <&P9_31_gpio_pu_pin>;
+        pinctrl-3 = <&P9_31_gpio_pd_pin>;
+        pinctrl-4 = <&P9_31_pwm_pin>;
+        pinctrl-5 = <&P9_31_spi_pin>;
+        pinctrl-6 = <&P9_31_pruout_pin>;
+        pinctrl-7 = <&P9_31_pruin_pin>;
+    };
+
+     /* SPI SS1 */
+    H18_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "spi";
+        pinctrl-0 = <&H18_default_pin>;
+        pinctrl-1 = <&H18_gpio_pin>;
+        pinctrl-2 = <&H18_gpio_pu_pin>;
+        pinctrl-3 = <&H18_gpio_pd_pin>;
+        pinctrl-4 = <&H18_spi_pin>;
+    };
+
+    /* SPI SS2 */
+    C18_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "spi";
+        pinctrl-0 = <&C18_default_pin>;
+        pinctrl-1 = <&C18_gpio_pin>;
+        pinctrl-2 = <&C18_gpio_pu_pin>;
+        pinctrl-3 = <&C18_gpio_pd_pin>;
+        pinctrl-4 = <&C18_spi_pin>;
+    };
+
+    /* UART 1 TX */
+     P9_24_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "uart", "can", "i2c",  "pruin";
+        pinctrl-0 = <&P9_24_default_pin>;
+        pinctrl-1 = <&P9_24_gpio_pin>;
+        pinctrl-2 = <&P9_24_gpio_pu_pin>;
+        pinctrl-3 = <&P9_24_gpio_pd_pin>;
+        pinctrl-4 = <&P9_24_uart_pin>;
+        pinctrl-5 = <&P9_24_can_pin>;
+        pinctrl-6 = <&P9_24_i2c_pin>;
+        pinctrl-7 = <&P9_24_pruin_pin>;
+    };
+
+    /* UART 1 RX */
+    P9_26_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "uart", "can", "i2c",  "pruin";
+        pinctrl-0 = <&P9_26_default_pin>;
+        pinctrl-1 = <&P9_26_gpio_pin>;
+        pinctrl-2 = <&P9_26_gpio_pu_pin>;
+        pinctrl-3 = <&P9_26_gpio_pd_pin>;
+        pinctrl-4 = <&P9_26_uart_pin>;
+        pinctrl-5 = <&P9_26_can_pin>;
+        pinctrl-6 = <&P9_26_i2c_pin>;
+        pinctrl-7 = <&P9_26_pruin_pin>;
+    };
+
+    /* U16 BLUE_GP0_PIN_3 gpio 1_25*/
+    U16_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd";
+        pinctrl-0 = <&U16_default_pin>;
+        pinctrl-1 = <&U16_gpio_pin>;
+        pinctrl-2 = <&U16_gpio_pu_pin>;
+        pinctrl-3 = <&U16_gpio_pd_pin>;
+    };
+
+
+    /* BLUE_GP0_PIN_3 gpio1_17*/
+    P9_23_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm";
+        pinctrl-0 = <&P9_23_default_pin>;
+        pinctrl-1 = <&P9_23_gpio_pin>;
+        pinctrl-2 = <&P9_23_gpio_pu_pin>;
+        pinctrl-3 = <&P9_23_gpio_pd_pin>;
+        pinctrl-4 = <&P9_23_pwm_pin>;
+    };
+
+    /* BLUE_GP0_PIN_5 gpio3_20 */
+    D13_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd";
+        pinctrl-0 = <&D13_default_pin>;
+        pinctrl-1 = <&D13_gpio_pin>;
+        pinctrl-2 = <&D13_gpio_pu_pin>;
+        pinctrl-3 = <&D13_gpio_pd_pin>;
+    };
+
+    /* BLUE_GP0_PIN_6 gpio3_17 */
+    P9_28_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pwm2", "pruout", "pruin";
+        pinctrl-0 = <&P9_28_default_pin>;
+        pinctrl-1 = <&P9_28_gpio_pin>;
+        pinctrl-2 = <&P9_28_gpio_pu_pin>;
+        pinctrl-3 = <&P9_28_gpio_pd_pin>;
+        pinctrl-4 = <&P9_28_pwm_pin>;
+        pinctrl-5 = <&P9_28_spi_pin>;
+        pinctrl-6 = <&P9_28_pwm2_pin>;
+        pinctrl-7 = <&P9_28_pruout_pin>;
+        pinctrl-8 = <&P9_28_pruin_pin>;
+    };
+
+    /* BLUE_GP1_PIN_3 gpio3_2 */
+    J15_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd";
+        pinctrl-0 = <&J15_default_pin>;
+        pinctrl-1 = <&J15_gpio_pin>;
+        pinctrl-2 = <&J15_gpio_pu_pin>;
+        pinctrl-3 = <&J15_gpio_pd_pin>;
+    };
+
+    /* BLUE_GP1_PIN_4 gpio3_1 */
+    H17_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd";
+        pinctrl-0 = <&H17_default_pin>;
+        pinctrl-1 = <&H17_gpio_pin>;
+        pinctrl-2 = <&H17_gpio_pu_pin>;
+        pinctrl-3 = <&H17_gpio_pd_pin>;
+    };
+
+
+
+};
+
+
+/*******************************************************************************
+*	PWMSS
+*******************************************************************************/
+&epwmss0 {
+	status = "okay";
+};
+
+&epwmss1 {
+	status = "okay";
+};
+
+&epwmss2 {
+	status = "okay";
+};
+
+&ehrpwm0 {
+	status = "okay";
+};
+
+&ehrpwm1 {
+	status = "okay";
+};
+
+&ehrpwm2 {
+	status = "okay";
+};
+
+
+/*******************************************************************************
+* EQEP
+*******************************************************************************/
+&eqep0 {
+	count_mode = <0>;  /* 0 - Quadrature mode, normal 90 phase offset cha & chb.  1 - Direction mode.  cha input = clock, chb input = direction */
+	swap_inputs = <0>; /* Are channel A and channel B swapped? (0 - no, 1 - yes) */
+	invert_qa = <1>;   /* Should we invert the channel A input?  */
+	invert_qb = <1>;   /* Should we invert the channel B input? I invert these because my encoder outputs drive transistors that pull down the pins */
+	invert_qi = <0>;   /* Should we invert the index input? */
+	invert_qs = <0>;   /* Should we invert the strobe input? */
+	status = "okay";
+};
+
+&eqep1 {
+	count_mode = <0>;  /* 0 - Quadrature mode, normal 90 phase offset cha & chb.  1 - Direction mode.  cha input = clock, chb input = direction */
+	swap_inputs = <0>; /* Are channel A and channel B swapped? (0 - no, 1 - yes) */
+	invert_qa = <1>;   /* Should we invert the channel A input?  */
+	invert_qb = <1>;   /* Should we invert the channel B input? I invert these because my encoder outputs drive transistors that pull down the pins */
+	invert_qi = <0>;   /* Should we invert the index input? */
+	invert_qs = <0>;   /* Should we invert the strobe input? */
+	status = "okay";
+};
+
+&eqep2 {
+	count_mode = <0>;  /* 0 - Quadrature mode, normal 90 phase offset cha & chb.  1 - Direction mode.  cha input = clock, chb input = direction */
+	swap_inputs = <0>; /* Are channel A and channel B swapped? (0 - no, 1 - yes) */
+	invert_qa = <1>;   /* Should we invert the channel A input?  */
+	invert_qb = <1>;   /* Should we invert the channel B input? I invert these because my encoder outputs drive transistors that pull down the pins */
+	invert_qi = <0>;   /* Should we invert the index input? */
+	invert_qs = <0>;   /* Should we invert the strobe input? */
+	status = "okay";
+};
+
+
+/*******************************************************************************
+	UART
+*******************************************************************************/
+
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&uart4 {
+	status = "okay";
+};
+
+&uart5 {
+	status = "okay";
+};
+
+
+/*******************************************************************************
+	PRU
+*******************************************************************************/
+&pruss {
+	status = "okay";
+};
+
+
+/*******************************************************************************
+	I2C
+*******************************************************************************/
+&i2c1 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "okay";
+	clock-frequency = <400000>;
+};
+
+&i2c2 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "okay";
+	clock-frequency = <400000>;
+};
+
+/*******************************************************************************
+	SPI
+*******************************************************************************/
+&spi1 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "okay";
+
+	channel@0 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "spidev";
+		reg = <0>;
+		spi-max-frequency = <24000000>;
+	};
+
+	channel@1 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "spidev";
+		reg = <1>;
+		spi-max-frequency = <24000000>;
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-bonegreen-overlay.dts b/arch/arm/boot/dts/am335x-bonegreen-overlay.dts
new file mode 100644
index 0000000..a703fcd
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bonegreen-overlay.dts
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+
+/ {
+	model = "TI AM335x BeagleBone Green";
+	compatible = "ti,am335x-bone-green", "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+/* EMMC in reset */
+&gpio1 {
+	emmc_rst {
+		gpio-hog;
+		gpios = <20 0>;
+		output-high;
+		line-name = "EMMC ResetN";
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-bonegreen-wireless.dts b/arch/arm/boot/dts/am335x-bonegreen-wireless.dts
new file mode 100644
index 0000000..f37f39e
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bonegreen-wireless.dts
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+#include "am335x-bonegreen-wl1835.dtsi"
+/* #include "am335x-bone-jtag.dtsi" */
+
+/ {
+	model = "TI AM335x BeagleBone Green Wireless";
+	compatible = "ti,am335x-bone-green", "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+};
+
+&uart3 {
+	status = "okay";
+};
+
+&mmc3 {
+	status = "okay";
+};
+
+&mac {
+	status = "disabled";
+};
diff --git b/arch/arm/boot/dts/am335x-bonegreen-wl1835.dtsi b/arch/arm/boot/dts/am335x-bonegreen-wl1835.dtsi
new file mode 100644
index 0000000..0d4798d
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-bonegreen-wl1835.dtsi
@@ -0,0 +1,147 @@
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+	wlan_en_reg: fixedregulator@2 {
+		compatible = "regulator-fixed";
+		regulator-name = "wlan-en-regulator";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		startup-delay-us= <70000>;
+
+		/* WL_EN */
+		gpio = <&gpio0 26 0>;
+		enable-active-high;
+	};
+
+	btwilink {
+		compatible = "btwilink";
+	};
+
+	wilink8_pcm: wilink8_pcm {
+		compatible = "ti,wilink8_bt";
+		status = "okay";
+	};
+
+	sound{
+		compatible = "ti,wilink8-bt-audio";
+		ti,model = "WILINK8_BT";
+		ti,audio-codec = <&wilink8_pcm>;
+		ti,mcasp-controller = <&mcasp0>;
+		ti,codec-clock-rate = <24000000>;
+	};
+};
+
+&am33xx_pinmux {
+	bt_pins: pinmux_bt_pins {
+		pinctrl-single,pins = <
+			0x78 (PIN_OUTPUT | MUX_MODE7)	/* gpmc_ad12.gpio1_28 BT_EN*/
+		>;
+	};
+
+	mmc3_pins: pinmux_mmc3_pins {
+		pinctrl-single,pins = <
+			0x8c ( PIN_INPUT_PULLUP | MUX_MODE3 ) /* gpio2_1 gpmc_clk.mmc2_clk */
+			0x88 ( PIN_INPUT_PULLUP | MUX_MODE3) /* gpio2_0 gpmc_csn3.mmc2_cmd */
+			0x30 ( PIN_INPUT_PULLUP | MUX_MODE3 ) /* gpio1_12 gpmc_ad12.mmc2_dat0 */
+			0x34 ( PIN_INPUT_PULLUP | MUX_MODE3 ) /* gpio1_13 gpmc_ad13.mmc2_dat1 */
+			0x38 ( PIN_INPUT_PULLUP | MUX_MODE3 ) /* gpio1_14 gpmc_ad14.mmc2_dat2 */
+			0x3c ( PIN_INPUT_PULLUP | MUX_MODE3 ) /* gpio1_15 gpmc_ad15.mmc2_dat3 */
+		>;
+	};
+
+	mmc3_pins_sleep: pinmux_mmc3_pins_sleep {
+		pinctrl-single,pins = <
+			0x8c (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpio2_1 gpmc_clk.mmc2_clk */
+			0x88 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpio2_0 gpmc_csn3.mmc2_cmd */
+			0x30 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpio1_12 gpmc_ad12.mmc2_dat0 */
+			0x34 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpio1_13 gpmc_ad13.mmc2_dat1 */
+			0x38 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpio1_14 gpmc_ad14.mmc2_dat2 */
+			0x3c (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpio1_15 gpmc_ad15.mmc2_dat3 */
+		>;
+	};
+
+	/* wl18xx card enable/irq GPIOs. */
+	wlan_pins: pinmux_wlan_pins {
+		pinctrl-single,pins = <
+			0x28 (PIN_OUTPUT_PULLDOWN| MUX_MODE7)	/* gpmc_ad10.gpio0_26 WL_EN*/
+			0x2C (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* gpmc_ad11.gpio0_27 WL_IRQ*/
+			0x7C (PIN_OUTPUT_PULLUP | MUX_MODE0)	/* gpmc_csn0.gpio1_29 Cape_Buffer_EN*/
+		>;
+	};
+
+	/* wl18xx card enable/irq GPIOs. */
+	wlan_pins_sleep: pinmux_wlan_pins_sleep {
+		pinctrl-single,pins = <
+			0x28 (PIN_OUTPUT_PULLDOWN | MUX_MODE7)	/* gpmc_ad10.gpio0_26 WL_EN*/
+			0x2C (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* gpmc_ad11.gpio0_27 WL_IRQ*/
+			0x7C (PIN_OUTPUT_PULLUP | MUX_MODE0)	/* gpmc_csn0.gpio1_29 Cape_Buffer_EN*/
+		>;
+	};
+
+	uart3_pins_default: pinmux_uart3_pins_default {
+		pinctrl-single,pins = <
+			0x134 ( PIN_INPUT_PULLUP | MUX_MODE1 ) 		/* (L17) gmii1_rxd3.uart3_rxd */
+			0x138 ( PIN_OUTPUT_PULLDOWN | MUX_MODE1 )	/* (L16) gmii1_rxd2.uart3_txd */
+			0x148 ( PIN_INPUT | MUX_MODE3 ) 			/* (M17) mdio_data.uart3_ctsn */
+			0x14c ( PIN_OUTPUT_PULLDOWN | MUX_MODE3 ) 	/* (M18) mdio_clk.uart3_rtsn */
+		>;
+	};
+
+	uart3_pins_sleep: pinmux_uart3_pins_sleep {
+		pinctrl-single,pins = <
+			0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (L17) gmii1_rxd3.uart3_rxd */
+			0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (L16) gmii1_rxd2.uart3_txd */
+			0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (M17) mdio_data.uart3_ctsn */
+			0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7)	/* (M18) mdio_clk.uart3_rtsn */
+		>;
+	};
+};
+
+&mmc3 {
+	dmas = <&edma_xbar 12 0 1
+		&edma_xbar 13 0 2>;
+	dma-names = "tx", "rx";
+	status = "okay";
+	vmmc-supply = <&wlan_en_reg>;
+	bus-width = <4>;
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&mmc3_pins &wlan_pins>;
+	pinctrl-1 = <&mmc3_pins_sleep &wlan_pins_sleep>;
+	ti,non-removable;
+	ti,needs-special-hs-handling;
+	cap-power-off-card;
+	keep-power-in-suspend;
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	wlcore: wlcore@0 {
+		compatible = "ti,wl1835";
+		reg = <2>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <27 IRQ_TYPE_EDGE_RISING>;
+	};
+};
+
+&uart3 {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&uart3_pins_default &bt_pins>;
+	pinctrl-1 = <&uart3_pins_sleep &bt_pins>;
+	status = "okay";
+};
+
+/* BT_AUD_OUT from wl1835 has to be pulled low when WL_EN is activated.	*/
+/* in case it isn't, wilink8 ends up in one of the test modes that      */
+/* intruces various issues (elp wkaeup timeouts etc.)			*/
+/* On the BBGW this pin is routed through the level shifter (U21) that	*/
+/* introduces a pullup on the line and wilink8 ends up in a bad state.  */
+/* use a gpio hog to force this pin low. An alternative may be adding 	*/
+/* an external pulldown on U21 pin 4.					*/
+
+&gpio3 {
+	bt_aud_in {
+		gpio-hog;
+		gpios = <16 GPIO_ACTIVE_HIGH>;
+		output-low;
+		line-name = "MCASP0_AXR0";
+	};
+};
diff --git a/arch/arm/boot/dts/am335x-bonegreen.dts b/arch/arm/boot/dts/am335x-bonegreen.dts
index dce3c86..647d4bf 100644
--- a/arch/arm/boot/dts/am335x-bonegreen.dts
+++ b/arch/arm/boot/dts/am335x-bonegreen.dts
@@ -9,6 +9,7 @@
 
 #include "am33xx.dtsi"
 #include "am335x-bone-common.dtsi"
+/* #include "am335x-bone-jtag.dtsi" */
 
 / {
 	model = "TI AM335x BeagleBone Green";
@@ -32,22 +33,3 @@
 	bus-width = <8>;
 	status = "okay";
 };
-
-&am33xx_pinmux {
-	uart2_pins: uart2_pins {
-		pinctrl-single,pins = <
-			AM33XX_IOPAD(0x950, PIN_INPUT | MUX_MODE1)	/* spi0_sclk.uart2_rxd */
-			AM33XX_IOPAD(0x954, PIN_OUTPUT | MUX_MODE1)	/* spi0_d0.uart2_txd */
-		>;
-	};
-};
-
-&uart2 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&uart2_pins>;
-	status = "okay";
-};
-
-&rtc {
-	system-power-controller;
-};
diff --git b/arch/arm/boot/dts/am335x-cape-bbb-exp-c.dtsi b/arch/arm/boot/dts/am335x-cape-bbb-exp-c.dtsi
new file mode 100644
index 0000000..01f9cde
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-cape-bbb-exp-c.dtsi
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+
+#include "am335x-peripheral-can0.dtsi"
+#include "am335x-bone-pinmux-can0.dtsi"
+
+#include "am335x-peripheral-ttyS1.dtsi"
+#include "am335x-bone-pinmux-ttyS1.dtsi"
+
+#include "am335x-peripheral-ttyS2.dtsi"
+#include "am335x-bone-pinmux-ttyS2.dtsi"
+
+#include "am335x-peripheral-ttyS4.dtsi"
+#include "am335x-bone-pinmux-ttyS4.dtsi"
+
+&am33xx_pinmux {
+	user_leds_s1: user_leds_s1 {
+		pinctrl-single,pins = <
+			0x98 0x7	/* gpmc_wen.gpio2_4, OUTPUT | MODE7 */
+			0x9c 0x7	/* gpmc_ben0_cle.gpio2_5, OUTPUT | MODE7 */
+		>;
+	};
+
+	bb_lcd_pwm_backlight_pins: pinmux_bb_lcd_pwm_backlight_pins {
+		pinctrl-single,pins = <
+			BONE_P9_14 (PIN_OUTPUT_PULLDOWN | MUX_MODE6) /* gpmc_a2.ehrpwm1a */
+		>;
+	};
+
+	keymap3_pins: pinmux_keymap3_pins {
+		pinctrl-single,pins = <
+			0x040 0x2f	/* KEY_UP gpmc_a0.gpio1_16, INPUT | PULLDIS | MODE7 */
+			0x04c 0x2f	/* KEY_DOWN gpmc_a3.gpio1_19, INPUT | PULLDIS | MODE7 */
+			0x078 0x2f	/* KEY_RIGHT gpmc_ben1.gpio1_28, INPUT | PULLDIS | MODE7 */
+			0x164 0x2f	/* KEY_LEFT ecap0_in_pwm0_out.gpio0_7, INPUT | PULLDIS | MODE7 */
+			0x1a4 0x2f	/* KEY_ENTER mcasp0_fxr.gpio3_19, INPUT | PULLDIS | MODE7 */
+		>;
+	};
+
+	edt_ft5306_ts_pins: pinmux_edt_ft5306_ts_pins {
+		pinctrl-single,pins = <
+			/* CAP_TSC gpmc_a1.gpio1_17, INPUT | MODE7 */
+			BONE_P9_23 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+		>;
+	};
+
+	i2c1_pins: pinmux_i2c1_pins {
+		pinctrl-single,pins = <
+			/* spi0_d1.i2c1_sda, SLEWCTRL_SLOW | INPUT_PULLUP | MODE2 */
+			BONE_P9_18 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE2)
+			/* spi0_cs0.i2c1_scl, SLEWCTRL_SLOW | INPUT_PULLUP | MODE2 */
+			BONE_P9_17 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE2)
+		>;
+	};
+
+	mcasp0_pins: pinmux_mcasp0_pins {
+		pinctrl-single,pins = <
+			0x190 0x20      /* mcasp0_aclkx.mcasp0_aclkx, INPUT | MODE0 */
+			0x194 0x20      /* mcasp0_fsx.mcasp0_fsx, INPUT | MODE0 */
+			0x198 0x20      /* mcasp0_axr0.mcasp0_axr0, INPUT | MODE0 */
+			0x19c 0x22      /* mcasp0_ahclkr.mcasp0_axr2, INPUT | MODE2 */
+		>;
+	};
+};
+
+&epwmss1 {
+	status = "okay";
+};
+
+
+&ehrpwm1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&bb_lcd_pwm_backlight_pins>;
+	status = "okay";
+};
+
+&i2c1 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c1_pins>;
+	clock-frequency = <400000>;
+
+	edt-ft5306@38 {
+		status = "okay";
+		compatible = "edt,edt-ft5306", "edt,edt-ft5x06";
+		pinctrl-names = "default";
+		pinctrl-0 = <&edt_ft5306_ts_pins>;
+
+		reg = <0x38>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <17 0>;
+
+		touchscreen-size-x = <1024>;
+		touchscreen-size-y = <600>;
+	};
+
+	tlv320aic3x: tlv320aic3x@1b {
+		compatible = "ti,tlv320aic3x";
+		reg = <0x1b>;
+		status = "okay";
+	};
+};
+
+&mcasp0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&mcasp0_pins>;
+
+	status = "okay";
+
+	op-mode = <0>;          /* MCASP_IIS_MODE */
+	tdm-slots = <2>;
+	num-serializer = <16>;
+	serial-dir = <  /* 0: INACTIVE, 1: TX, 2: RX */
+		1 0 2 0
+		0 0 0 0
+		0 0 0 0
+		0 0 0 0
+	>;
+	tx-num-evt = <1>;
+	rx-num-evt = <1>;
+};
+
+/ {
+	backlight {
+		status = "okay";
+		compatible = "pwm-backlight";
+		pwms = <&ehrpwm1 0 50000 0>;
+		brightness-levels = <0 51 53 56 62 75 101 152 255>;
+		default-brightness-level = <8>;
+	};
+
+	gpio_keys {
+		compatible = "gpio-keys";
+		pinctrl-names = "default";
+		pinctrl-0 = <&keymap3_pins>;
+
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		button@1 {
+			debounce_interval = <50>;
+			linux,code = <105>;
+			label = "left";
+			gpios = <&gpio0 7 0x1>;
+			gpio-key,wakeup;
+			autorepeat;
+		};
+		button@2 {
+			debounce_interval = <50>;
+			linux,code = <106>;
+			label = "right";
+			gpios = <&gpio1 28 0x1>;
+			gpio-key,wakeup;
+			autorepeat;
+		};
+		button@3 {
+			debounce_interval = <50>;
+			linux,code = <103>;
+			label = "up";
+			gpios = <&gpio1 16 0x1>;
+			gpio-key,wakeup;
+			autorepeat;
+		};
+		button@4 {
+			debounce_interval = <50>;
+			linux,code = <108>;
+			label = "down";
+			gpios = <&gpio1 19 0x1>;
+			gpio-key,wakeup;
+			autorepeat;
+		};
+		button@5 {
+			debounce_interval = <50>;
+			linux,code = <28>;
+			label = "enter";
+			gpios = <&gpio3 19 0x1>;
+			gpio-key,wakeup;
+		};
+	};
+
+	gpio-leds-cape-lcd {
+		compatible = "gpio-leds";
+		pinctrl-names = "default";
+
+		pinctrl-0 = <&user_leds_s1>;
+
+		lcd-led0 {
+			label = "lcd:green:usr0";
+			gpios = <&gpio2 4 0>;
+			linux,default-trigger = "heartbeat";
+			default-state = "off";
+		};
+
+		lcd-led1 {
+			label = "lcd:green:usr1";
+			gpios = <&gpio2 5 0>;
+			linux,default-trigger = "mmc0";
+			default-state = "off";
+		};
+	};
+
+	sound {
+		compatible = "ti,da830-evm-audio";
+		ti,model = "DA830 EVM";
+		ti,audio-codec = <&tlv320aic3x>;
+		ti,mcasp-controller = <&mcasp0>;
+		ti,codec-clock-rate = <12000000>;
+		ti,audio-routing =
+			"Headphone Jack",       "HPLOUT",
+			"Headphone Jack",       "HPROUT",
+			"MIC3L",                "Mic Jack",
+			"MIC3R",                "Mic Jack";
+	};
+};
+
+#include "am335x-peripheral-panel-1024x600-24bit.dtsi"
+#include "am335x-bone-pinmux-panel-1024x600-24bit.dtsi"
diff --git b/arch/arm/boot/dts/am335x-cape-bbb-exp-r.dtsi b/arch/arm/boot/dts/am335x-cape-bbb-exp-r.dtsi
new file mode 100644
index 0000000..539409c
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-cape-bbb-exp-r.dtsi
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+
+#include "am335x-peripheral-can0.dtsi"
+#include "am335x-bone-pinmux-can0.dtsi"
+
+#include "am335x-peripheral-ttyS1.dtsi"
+#include "am335x-bone-pinmux-ttyS1.dtsi"
+
+#include "am335x-peripheral-ttyS2.dtsi"
+#include "am335x-bone-pinmux-ttyS2.dtsi"
+
+#include "am335x-peripheral-ttyS4.dtsi"
+#include "am335x-bone-pinmux-ttyS4.dtsi"
+
+&am33xx_pinmux {
+	user_leds_s1: user_leds_s1 {
+		pinctrl-single,pins = <
+			0x98 0x7	/* gpmc_wen.gpio2_4, OUTPUT | MODE7 */
+			0x9c 0x7	/* gpmc_ben0_cle.gpio2_5, OUTPUT | MODE7 */
+		>;
+	};
+
+	bb_lcd_pwm_backlight_pins: pinmux_bb_lcd_pwm_backlight_pins {
+		pinctrl-single,pins = <
+			BONE_P9_14 (PIN_OUTPUT_PULLDOWN | MUX_MODE6) /* gpmc_a2.ehrpwm1a */
+		>;
+	};
+
+	keymap3_pins: pinmux_keymap3_pins {
+		pinctrl-single,pins = <
+			0x040 0x2f	/* KEY_UP gpmc_a0.gpio1_16, INPUT | PULLDIS | MODE7 */
+			0x04c 0x2f	/* KEY_DOWN gpmc_a3.gpio1_19, INPUT | PULLDIS | MODE7 */
+			0x078 0x2f	/* KEY_RIGHT gpmc_ben1.gpio1_28, INPUT | PULLDIS | MODE7 */
+			0x164 0x2f	/* KEY_LEFT ecap0_in_pwm0_out.gpio0_7, INPUT | PULLDIS | MODE7 */
+			0x1a4 0x2f	/* KEY_ENTER mcasp0_fxr.gpio3_19, INPUT | PULLDIS | MODE7 */
+		>;
+	};
+
+	i2c1_pins: pinmux_i2c1_pins {
+		pinctrl-single,pins = <
+			/* spi0_d1.i2c1_sda, SLEWCTRL_SLOW | INPUT_PULLUP | MODE2 */
+			BONE_P9_18 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE2)
+			/* spi0_cs0.i2c1_scl, SLEWCTRL_SLOW | INPUT_PULLUP | MODE2 */
+			BONE_P9_17 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE2)
+		>;
+	};
+
+	mcasp0_pins: pinmux_mcasp0_pins {
+		pinctrl-single,pins = <
+			0x190 0x20      /* mcasp0_aclkx.mcasp0_aclkx, INPUT | MODE0 */
+			0x194 0x20      /* mcasp0_fsx.mcasp0_fsx, INPUT | MODE0 */
+			0x198 0x20      /* mcasp0_axr0.mcasp0_axr0, INPUT | MODE0 */
+			0x19c 0x22      /* mcasp0_ahclkr.mcasp0_axr2, INPUT | MODE2 */
+		>;
+	};
+};
+
+&epwmss1 {
+	status = "okay";
+};
+
+
+&ehrpwm1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&bb_lcd_pwm_backlight_pins>;
+	status = "okay";
+};
+
+&i2c1 {
+	status = "okay";
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c1_pins>;
+	clock-frequency = <400000>;
+
+	tlv320aic3x: tlv320aic3x@1b {
+		compatible = "ti,tlv320aic3x";
+		reg = <0x1b>;
+		status = "okay";
+	};
+};
+
+&mcasp0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&mcasp0_pins>;
+
+	status = "okay";
+
+	op-mode = <0>;          /* MCASP_IIS_MODE */
+	tdm-slots = <2>;
+	num-serializer = <16>;
+	serial-dir = <  /* 0: INACTIVE, 1: TX, 2: RX */
+		1 0 2 0
+		0 0 0 0
+		0 0 0 0
+		0 0 0 0
+	>;
+	tx-num-evt = <1>;
+	rx-num-evt = <1>;
+};
+
+&tscadc {
+	status = "okay";
+	tsc {
+		ti,wires = <4>;
+		ti,x-plate-resistance = <200>;
+		ti,coordinate-readouts = <5>;
+		ti,wire-config = <0x00 0x11 0x22 0x33>;
+	};
+
+	adc {
+		ti,adc-channels = <4 5 6 7>;
+	};
+};
+
+/ {
+	backlight {
+		status = "okay";
+		compatible = "pwm-backlight";
+		pwms = <&ehrpwm1 0 50000 0>;
+		brightness-levels = <0 51 53 56 62 75 101 152 255>;
+		default-brightness-level = <8>;
+	};
+
+	gpio_keys {
+		compatible = "gpio-keys";
+		pinctrl-names = "default";
+		pinctrl-0 = <&keymap3_pins>;
+
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		button@1 {
+			debounce_interval = <50>;
+			linux,code = <105>;
+			label = "left";
+			gpios = <&gpio0 7 0x1>;
+			gpio-key,wakeup;
+			autorepeat;
+		};
+		button@2 {
+			debounce_interval = <50>;
+			linux,code = <106>;
+			label = "right";
+			gpios = <&gpio1 28 0x1>;
+			gpio-key,wakeup;
+			autorepeat;
+		};
+		button@3 {
+			debounce_interval = <50>;
+			linux,code = <103>;
+			label = "up";
+			gpios = <&gpio1 16 0x1>;
+			gpio-key,wakeup;
+			autorepeat;
+		};
+		button@4 {
+			debounce_interval = <50>;
+			linux,code = <108>;
+			label = "down";
+			gpios = <&gpio1 19 0x1>;
+			gpio-key,wakeup;
+			autorepeat;
+		};
+		button@5 {
+			debounce_interval = <50>;
+			linux,code = <28>;
+			label = "enter";
+			gpios = <&gpio3 19 0x1>;
+			gpio-key,wakeup;
+		};
+	};
+
+	gpio-leds-cape-lcd {
+		compatible = "gpio-leds";
+		pinctrl-names = "default";
+
+		pinctrl-0 = <&user_leds_s1>;
+
+		lcd-led0 {
+			label = "lcd:green:usr0";
+			gpios = <&gpio2 4 0>;
+			linux,default-trigger = "heartbeat";
+			default-state = "off";
+		};
+
+		lcd-led1 {
+			label = "lcd:green:usr1";
+			gpios = <&gpio2 5 0>;
+			linux,default-trigger = "mmc0";
+			default-state = "off";
+		};
+	};
+
+	sound {
+		compatible = "ti,da830-evm-audio";
+		ti,model = "DA830 EVM";
+		ti,audio-codec = <&tlv320aic3x>;
+		ti,mcasp-controller = <&mcasp0>;
+		ti,codec-clock-rate = <12000000>;
+		ti,audio-routing =
+			"Headphone Jack",       "HPLOUT",
+			"Headphone Jack",       "HPROUT",
+			"MIC3L",                "Mic Jack",
+			"MIC3R",                "Mic Jack";
+	};
+};
+
+#include "am335x-peripheral-panel-1024x600-24bit.dtsi"
+#include "am335x-bone-pinmux-panel-1024x600-24bit.dtsi"
diff --git b/arch/arm/boot/dts/am335x-cape-rtc-ds1307.dtsi b/arch/arm/boot/dts/am335x-cape-rtc-ds1307.dtsi
new file mode 100644
index 0000000..bce6ac5
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-cape-rtc-ds1307.dtsi
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <dt-bindings/board/am335x-bbw-bbb-base.h>
+
+&am33xx_pinmux {
+	i2c2_pins: pinmux_i2c2_pins {
+		pinctrl-single,pins = <
+			BONE_P9_20 0x73 /* (SLEWCTRL_SLOW | PIN_INPUT_PULLUP | MUX_MODE3) uart1_ctsn.i2c2_sda */
+			BONE_P9_19 0x73 /* (SLEWCTRL_SLOW | PIN_INPUT_PULLUP | MUX_MODE3) uart1_rtsn.i2c2_scl */
+		>;
+	};
+};
+
+&i2c2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c2_pins>;
+
+	status = "okay";
+	clock-frequency = <100000>;
+
+	rtc@68 {
+		compatible = "maxim,ds1307";
+		reg = <0x68>;
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-olimex-som.dts b/arch/arm/boot/dts/am335x-olimex-som.dts
new file mode 100644
index 0000000..2b00ad2
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-olimex-som.dts
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-som-common.dtsi"
+
+/ {
+	model = "Olimex AM335x SOM";
+	compatible = "olimex,am335x-olimex-som", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&am33xx_pinmux {
+	lcd_pins_default: lcd_pins_default {
+		pinctrl-single,pins = <
+		0x20 0x01 /* gpmc_ad8.lcd_data16, OUTPUT | MODE1 */
+		0x24 0x01 /* gpmc_ad9.lcd_data17, OUTPUT | MODE1 */
+		0x28 0x01 /* gpmc_ad10.lcd_data18, OUTPUT | MODE1 */
+		0x2c 0x01 /* gpmc_ad11.lcd_data19, OUTPUT | MODE1 */
+		0x30 0x01 /* gpmc_ad12.lcd_data20, OUTPUT | MODE1 */
+		0x34 0x01 /* gpmc_ad13.lcd_data21, OUTPUT | MODE1 */
+		0x38 0x01 /* gpmc_ad14.lcd_data22, OUTPUT | MODE1 */
+		0x3c 0x01 /* gpmc_ad15.lcd_data23, OUTPUT | MODE1 */
+		0xa0 0x00 /* lcd_data0.lcd_data0, OUTPUT | MODE0 */
+		0xa4 0x00 /* lcd_data1.lcd_data1, OUTPUT | MODE0 */
+		0xa8 0x00 /* lcd_data2.lcd_data2, OUTPUT | MODE0 */
+		0xac 0x00 /* lcd_data3.lcd_data3, OUTPUT | MODE0 */
+		0xb0 0x00 /* lcd_data4.lcd_data4, OUTPUT | MODE0 */
+		0xb4 0x00 /* lcd_data5.lcd_data5, OUTPUT | MODE0 */
+		0xb8 0x00 /* lcd_data6.lcd_data6, OUTPUT | MODE0 */
+		0xbc 0x00 /* lcd_data7.lcd_data7, OUTPUT | MODE0 */
+		0xc0 0x00 /* lcd_data8.lcd_data8, OUTPUT | MODE0 */
+		0xc4 0x00 /* lcd_data9.lcd_data9, OUTPUT | MODE0 */
+		0xc8 0x00 /* lcd_data10.lcd_data10, OUTPUT | MODE0 */
+		0xcc 0x00 /* lcd_data11.lcd_data11, OUTPUT | MODE0 */
+		0xd0 0x00 /* lcd_data12.lcd_data12, OUTPUT | MODE0 */
+		0xd4 0x00 /* lcd_data13.lcd_data13, OUTPUT | MODE0 */
+		0xd8 0x00 /* lcd_data14.lcd_data14, OUTPUT | MODE0 */
+		0xdc 0x00 /* lcd_data15.lcd_data15, OUTPUT | MODE0 */
+		0xe0 0x00 /* lcd_vsync.lcd_vsync, OUTPUT | MODE0 */
+		0xe4 0x00 /* lcd_hsync.lcd_hsync, OUTPUT | MODE0 */
+		0xe8 0x00 /* lcd_pclk.lcd_pclk, OUTPUT | MODE0 */
+		0xec 0x00 /* lcd_ac_bias_en.lcd_ac_bias_en, OUTPUT | MODE0 */
+		>;
+	};
+
+	lcd_pins_sleep: lcd_pins_sleep {
+		pinctrl-single,pins = <
+		0x20 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad8.lcd_data16 */
+		0x24 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad9.lcd_data17 */
+		0x28 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad10.lcd_data18 */
+		0x2c (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad11.lcd_data19 */
+		0x30 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad12.lcd_data20 */
+		0x34 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad13.lcd_data21 */
+		0x38 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad14.lcd_data22 */
+		0x3c (PIN_INPUT_PULLDOWN | MUX_MODE7) /* gpmc_ad15.lcd_data23 */
+		0xa0 (PULL_DISABLE | MUX_MODE7) /* lcd_data0.lcd_data0 */
+		0xa4 (PULL_DISABLE | MUX_MODE7) /* lcd_data1.lcd_data1 */
+		0xa8 (PULL_DISABLE | MUX_MODE7) /* lcd_data2.lcd_data2 */
+		0xac (PULL_DISABLE | MUX_MODE7) /* lcd_data3.lcd_data3 */
+		0xb0 (PULL_DISABLE | MUX_MODE7) /* lcd_data4.lcd_data4 */
+		0xb4 (PULL_DISABLE | MUX_MODE7) /* lcd_data5.lcd_data5 */
+		0xb8 (PULL_DISABLE | MUX_MODE7) /* lcd_data6.lcd_data6 */
+		0xbc (PULL_DISABLE | MUX_MODE7) /* lcd_data7.lcd_data7 */
+		0xc0 (PULL_DISABLE | MUX_MODE7) /* lcd_data8.lcd_data8 */
+		0xc4 (PULL_DISABLE | MUX_MODE7) /* lcd_data9.lcd_data9 */
+		0xc8 (PULL_DISABLE | MUX_MODE7) /* lcd_data10.lcd_data10 */
+		0xcc (PULL_DISABLE | MUX_MODE7) /* lcd_data11.lcd_data11 */
+		0xd0 (PULL_DISABLE | MUX_MODE7) /* lcd_data12.lcd_data12 */
+		0xd4 (PULL_DISABLE | MUX_MODE7) /* lcd_data13.lcd_data13 */
+		0xd8 (PULL_DISABLE | MUX_MODE7) /* lcd_data14.lcd_data14 */
+		0xdc (PULL_DISABLE | MUX_MODE7) /* lcd_data15.lcd_data15 */
+		/* lcd_vsync.lcd_vsync,OUTPUT | MODE0 */
+		0xe0 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+		0xe4 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* lcd_hsync.lcd_hsync */
+		0xe8 (PIN_INPUT_PULLDOWN | MUX_MODE7) /* lcd_pclk.lcd_pclk */
+		/* lcd_ac_bias_en.lcd_ac_bias_en */
+		0xec (PIN_INPUT_PULLDOWN | MUX_MODE7)
+		>;
+	};
+
+};
+
+&lcdc {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&lcd_pins_default>;
+	pinctrl-1 = <&lcd_pins_sleep>;
+	status = "okay";
+	/*        display-timings {
+	 480x272 {
+	 hactive         = <480>;
+	 vactive         = <272>;
+	 hback-porch     = <43>;
+	 hfront-porch    = <8>;
+	 hsync-len       = <4>;
+	 vback-porch     = <12>;
+	 vfront-porch    = <4>;
+	 vsync-len       = <10>;
+	 clock-frequency = <9000000>;
+	 hsync-active    = <0>;
+	 vsync-active    = <0>;
+	 };
+	 };*/
+
+	display-timings {
+		native-mode = <&vga1024x768>;
+		lcd4: 480x272 {
+			clock-frequency = <9000000>;
+			hactive = <480>;
+			vactive = <272>;
+			hfront-porch = <3>;
+			hback-porch = <40>;
+			vback-porch = <8>;
+			vfront-porch = <7>;
+			hsync-len = <2>;
+			vsync-len = <1>;
+			hsync-active = <0>;
+			vsync-active = <0>;
+		};
+		lcd7: 800x480 {
+			clock-frequency = <33300000>;
+			hactive = <800>;
+			vactive = <480>;
+			hfront-porch = <210>;
+			hback-porch = <40>;
+			vback-porch = <23>;
+			vfront-porch = <20>;
+			hsync-len = <6>;
+			vsync-len = <2>;
+			hsync-active = <0>;
+			vsync-active = <0>;
+		};
+		lcd10: 1024x600 {
+			clock-frequency = <51200000>;
+			hactive = <1024>;
+			vactive = <600>;
+			hfront-porch = <160>;
+			hback-porch = <140>;
+			vback-porch = <20>;
+			vfront-porch = <12>;
+			hsync-len = <20>;
+			vsync-len = <3>;
+			hsync-active = <0>;
+			vsync-active = <0>;
+		};
+
+		vga800x600: 800x600 {
+			clock-frequency = <40000000>;
+			hactive = <800>;
+			vactive = <600>;
+			hfront-porch = <40>;
+			hback-porch = <88>;
+			vfront-porch = <1>;
+			vback-porch = <23>;
+			hsync-len = <128>;
+			vsync-len = <4>;
+			hsync-active = <0>;
+			vsync-active = <0>;
+		};
+		vga1024x768: 1024x768 {
+			clock-frequency = <65000000>;
+			hactive = <1024>;
+			hfront-porch = <24>;
+			hback-porch = <160>;
+			hsync-len = <136>;
+			vactive = <768>;
+			vfront-porch = <3>;
+			vback-porch = <29>;
+			vsync-len = <6>;
+			hsync-active = <0>;
+			vsync-active = <0>;
+		};
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-can0.dtsi b/arch/arm/boot/dts/am335x-peripheral-can0.dtsi
new file mode 100644
index 0000000..4335e39
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-can0.dtsi
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&dcan0 {
+	pinctrl-names = "default";
+
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-can1.dtsi b/arch/arm/boot/dts/am335x-peripheral-can1.dtsi
new file mode 100644
index 0000000..02b5bd1
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-can1.dtsi
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&dcan1 {
+	pinctrl-names = "default";
+
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-emmc.dtsi b/arch/arm/boot/dts/am335x-peripheral-emmc.dtsi
new file mode 100644
index 0000000..603f34e
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-emmc.dtsi
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+
+	bus-width = <8>;
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-i2c2.dtsi b/arch/arm/boot/dts/am335x-peripheral-i2c2.dtsi
new file mode 100644
index 0000000..ed9a0b5
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-i2c2.dtsi
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&i2c2 {
+	pinctrl-names = "default";
+
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-nxp-hdmi.dtsi b/arch/arm/boot/dts/am335x-peripheral-nxp-hdmi.dtsi
new file mode 100644
index 0000000..1dfd26a
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-nxp-hdmi.dtsi
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&lcdc {
+	status = "okay";
+	port {
+		lcdc_0: endpoint@0 {
+			remote-endpoint = <&hdmi_0>;
+		};
+	};
+};
+
+&i2c0 {
+	tda19988 {
+		compatible = "nxp,tda998x";
+		reg = <0x70>;
+
+		port {
+			hdmi_0: endpoint@0 {
+				remote-endpoint = <&lcdc_0>;
+			};
+		};
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-panel-1024x600-24bit.dtsi b/arch/arm/boot/dts/am335x-peripheral-panel-1024x600-24bit.dtsi
new file mode 100644
index 0000000..74ddc12
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-panel-1024x600-24bit.dtsi
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&lcdc {
+	status = "okay";
+};
+
+/ {
+	panel {
+		status = "okay";
+		compatible = "ti,tilcdc,panel";
+		pinctrl-names = "default";
+
+		panel-info {
+			ac-bias           = <255>;
+			ac-bias-intrpt    = <0>;
+			dma-burst-sz      = <16>;
+			bpp               = <32>;
+			fdd               = <0x80>;
+			sync-edge         = <0>;
+			sync-ctrl         = <0>;
+			raster-order      = <1>;
+			fifo-th           = <0>;
+		};
+		display-timings {
+			native-mode = <&timing0>;
+			timing0: 1024x600 {
+				clock-frequency = <36000000>;
+				hactive = <1024>;
+				vactive = <600>;
+				hfront-porch = <1>;
+				hback-porch = <45>;
+				hsync-len = <30>;
+				vback-porch = <22>;
+				vfront-porch = <12>;
+				vsync-len = <2>;
+				hsync-active = <1>;
+				vsync-active = <1>;
+				de-active = <1>;
+				pixelclk-active = <0>;
+			};
+		};
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-spi0.dtsi b/arch/arm/boot/dts/am335x-peripheral-spi0.dtsi
new file mode 100644
index 0000000..969e352
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-spi0.dtsi
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&spi0 {
+	pinctrl-names = "default";
+
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-spi1.dtsi b/arch/arm/boot/dts/am335x-peripheral-spi1.dtsi
new file mode 100644
index 0000000..ac5fe97
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-spi1.dtsi
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&spi1 {
+	pinctrl-names = "default";
+
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-spi1a.dtsi b/arch/arm/boot/dts/am335x-peripheral-spi1a.dtsi
new file mode 100644
index 0000000..ac5fe97
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-spi1a.dtsi
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&spi1 {
+	pinctrl-names = "default";
+
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-ttyS1.dtsi b/arch/arm/boot/dts/am335x-peripheral-ttyS1.dtsi
new file mode 100644
index 0000000..f59fa4c
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-ttyS1.dtsi
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&uart1 {
+	pinctrl-names = "default";
+
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-ttyS2.dtsi b/arch/arm/boot/dts/am335x-peripheral-ttyS2.dtsi
new file mode 100644
index 0000000..a25d6cf
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-ttyS2.dtsi
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&uart2 {
+	pinctrl-names = "default";
+
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-ttyS4.dtsi b/arch/arm/boot/dts/am335x-peripheral-ttyS4.dtsi
new file mode 100644
index 0000000..adc89f0
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-ttyS4.dtsi
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&uart4 {
+	pinctrl-names = "default";
+
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-peripheral-ttyS5.dtsi b/arch/arm/boot/dts/am335x-peripheral-ttyS5.dtsi
new file mode 100644
index 0000000..8b42fb0
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-peripheral-ttyS5.dtsi
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+&uart5 {
+	pinctrl-names = "default";
+
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/am335x-roboticscape.dtsi b/arch/arm/boot/dts/am335x-roboticscape.dtsi
new file mode 100644
index 0000000..77d815a
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-roboticscape.dtsi
@@ -0,0 +1,395 @@
+/*******************************************************************************
+* pinmux and modules used by roboticscape
+* included in:
+* am335x-boneblack-roboticscape.dts
+* am335x-boneblack-wireless-roboticscape.dts
+*******************************************************************************/
+
+
+
+/*******************************************************************************
+* Pin Muxing
+*******************************************************************************/
+&am33xx_pinmux {
+
+	/***************************************************************************
+	* Static Pinmux
+	***************************************************************************/
+	mux_helper_pins: pins {
+		pinctrl-single,pins = <
+
+			/* GPIO Input Pullup */
+			0x09c 0x37	/*P8.9  T6  PAUSE_BTN */
+			0x098 0x37	/*P8.10 U6  MODE_BTN  */
+			0x1AC 0x37	/*P9.25 A14 IMU_INT   */
+
+			/* LEDs GPIO Out*/
+			0x090 0x0F	/* P8.7 R7 LED_RED */
+			0x094 0x0F	/* P8.8 T7 LED_GREEN */
+			0x028 0x0F	/*P8.14 T11 BATT_LED_4 */
+			0x02C 0x0F	/*P8.17 U12 BATT_LED_1 */
+			0x08c 0x0F	/*P8.18 V12 BATT_LED_2 */
+			0x07c 0x0F	/*P8.26 V6  BATT_LED_3 */
+
+			/* Motor Control GPIO Out*/
+			0x0cc 0x0F	/*P8.34 MDIR_2B different from blue!!*/
+			0x0a8 0x0F	/*P8.43 MDIR_3B*/
+			0x0ac 0x0F	/*P8.44 MDIR_3A*/
+			0x0a0 0x0F	/*P8.45 MDIR_4A*/
+			0x0a4 0x0F	/*P8.46 MDIR_4B*/
+			0x078 0x0F	/*P9.12 MDIR_1A different from blue!!*/
+			0x074 0x0F	/*P9.13 MDIR_1B*/
+			0x040 0x0F	/*P9.15 MDIR_2A*/
+			0x1b4 0x0F	/*P9.41 MOT_STBY*/
+
+			/* HRPWM 1 */
+			0x048  0x6 /* P9_14 | MODE 6 */
+			0x04c  0x6 /* P9_16 | MODE 6 */
+
+			/* HRPWM 2 */
+			0x020  0x4 /* P8_19 | MODE 4 */
+			0x024  0x4 /* P8_13 | MODE 4 */
+
+			/* EQEP */
+			0x1A0 0x31  /* P9_42,EQEP0A, MODE1 */
+			0x1A4 0x31  /* P9_27,EQEP0B, MODE1 */
+			0x0D4 0x32  /* P8_33,EQEP1B, MODE2 */
+			0x0D0 0x32  /* P8_35,EQEP1A, MODE2 */
+			0x030 0x34  /* P8_12,EQEP2A, MODE4 */
+			0x034 0x34  /* P8_11,EQEP2B, MODE4 */
+
+			/* PRU encoder input */
+			0x03c 0x36	/* P8_15,PRU0_r31_15,MODE6 */
+			0x038 0x36	/* P8_16,PRU0_r31_16,MODE6 */
+
+			/* PRU Servo output */
+			0x0e0 0x05	/*pru1_pru_r30_8, MODE5*/
+			0x0e8 0x05	/*pru1_pru_r30_10, MODE5 */
+			0x0e4 0x05	/*pr1_pru1_pru_r30_9, MODE5 */
+			0x0ec 0x05	/*pru1_pru_r30_11, MODE5 */
+			0x0b8 0x05	/*pru1_pru_r30_6, MODE5 */
+			0x0bc 0x05	/*pru1_pru_r30_7, MODE5 */
+			0x0b0 0x05	/*pru1_pru_r30_4, MODE5 */
+			0x0b4 0x05	/*pru1_pru_r30_5, MODE5 */
+			0x0C8 0x0F	/*P8.36, SERVO_PWR GPIO OUT*/
+
+			/* I2C1 */
+			0x15C 0x32	/* P9.17,i2c1_scl,INPUT_PULLUP,MODE2 */
+			0x158 0x32	/* P9.18,i2c1_sda,INPUT_PULLUP,MODE2 */
+
+			/* I2C2 */
+			0x17c  0x73 /* P9.19, i2c2_sda, mode 3 */
+			0x178  0x73 /* P9.20, i2c2_sda, mode 3 */
+
+			/* UART5 */
+			0x0C4 0x34	/* P8.38,uart5_rxd,MODE4 */
+			0x0C0 0x14	/* P8.37,uart5_txd,MODE4 */
+
+		>;
+	};
+
+};
+
+
+/*******************************************************************************
+* apply static and dynamic pinmux modes listed above. Configurable pins get the
+* modes from am335x-boneblack-common-universal-pins.dtsi
+*******************************************************************************/
+&ocp {
+	/* activate the static pinmux helper list of pin modes above */
+	test_helper: helper {
+		compatible = "bone-pinmux-helper";
+		pinctrl-names = "default";
+		pinctrl-0 = <&mux_helper_pins>;
+
+		status = "okay";
+	};
+
+	/* UART4 RX DSM */
+	P9_11_pinmux {
+		compatible = "bone-pinmux-helper";
+		status = "okay";
+		pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "uart";
+		pinctrl-0 = <&P9_11_default_pin>;
+		pinctrl-1 = <&P9_11_gpio_pin>;
+		pinctrl-2 = <&P9_11_gpio_pu_pin>;
+		pinctrl-3 = <&P9_11_gpio_pd_pin>;
+		pinctrl-4 = <&P9_11_uart_pin>;
+	};
+
+	/* UART 2 TX GPS*/
+	P9_21_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "spi", "uart", "i2c", "pwm";
+        pinctrl-0 = <&P9_21_default_pin>;
+        pinctrl-1 = <&P9_21_gpio_pin>;
+        pinctrl-2 = <&P9_21_gpio_pu_pin>;
+        pinctrl-3 = <&P9_21_gpio_pd_pin>;
+        pinctrl-4 = <&P9_21_spi_pin>;
+        pinctrl-5 = <&P9_21_uart_pin>;
+        pinctrl-6 = <&P9_21_i2c_pin>;
+        pinctrl-7 = <&P9_21_pwm_pin>;
+    };
+
+    /* UART 2 RX GPS */
+    P9_22_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "spi", "uart", "i2c", "pwm";
+        pinctrl-0 = <&P9_22_default_pin>;
+        pinctrl-1 = <&P9_22_gpio_pin>;
+        pinctrl-2 = <&P9_22_gpio_pu_pin>;
+        pinctrl-3 = <&P9_22_gpio_pd_pin>;
+        pinctrl-4 = <&P9_22_spi_pin>;
+        pinctrl-5 = <&P9_22_uart_pin>;
+        pinctrl-6 = <&P9_22_i2c_pin>;
+        pinctrl-7 = <&P9_22_pwm_pin>;
+    };
+
+    /* SPI MISO */
+    P9_29_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pruout", "pruin";
+        pinctrl-0 = <&P9_29_default_pin>;
+        pinctrl-1 = <&P9_29_gpio_pin>;
+        pinctrl-2 = <&P9_29_gpio_pu_pin>;
+        pinctrl-3 = <&P9_29_gpio_pd_pin>;
+        pinctrl-4 = <&P9_29_pwm_pin>;
+        pinctrl-5 = <&P9_29_spi_pin>;
+        pinctrl-6 = <&P9_29_pruout_pin>;
+        pinctrl-7 = <&P9_29_pruin_pin>;
+    };
+
+    /* SPI MOSI */
+    P9_30_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pruout", "pruin";
+        pinctrl-0 = <&P9_30_default_pin>;
+        pinctrl-1 = <&P9_30_gpio_pin>;
+        pinctrl-2 = <&P9_30_gpio_pu_pin>;
+        pinctrl-3 = <&P9_30_gpio_pd_pin>;
+        pinctrl-4 = <&P9_30_pwm_pin>;
+        pinctrl-5 = <&P9_30_spi_pin>;
+        pinctrl-6 = <&P9_30_pruout_pin>;
+        pinctrl-7 = <&P9_30_pruin_pin>;
+    };
+
+    /* SPI SCK */
+    P9_31_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pruout", "pruin";
+        pinctrl-0 = <&P9_31_default_pin>;
+        pinctrl-1 = <&P9_31_gpio_pin>;
+        pinctrl-2 = <&P9_31_gpio_pu_pin>;
+        pinctrl-3 = <&P9_31_gpio_pd_pin>;
+        pinctrl-4 = <&P9_31_pwm_pin>;
+        pinctrl-5 = <&P9_31_spi_pin>;
+        pinctrl-6 = <&P9_31_pruout_pin>;
+        pinctrl-7 = <&P9_31_pruin_pin>;
+    };
+
+    /* SPI SS1 GPIO3_17*/
+	P9_28_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm", "spi", "pwm2", "pruout", "pruin";
+        pinctrl-0 = <&P9_28_default_pin>;
+        pinctrl-1 = <&P9_28_gpio_pin>;
+        pinctrl-2 = <&P9_28_gpio_pu_pin>;
+        pinctrl-3 = <&P9_28_gpio_pd_pin>;
+        pinctrl-4 = <&P9_28_pwm_pin>;
+        pinctrl-5 = <&P9_28_spi_pin>;
+        pinctrl-6 = <&P9_28_pwm2_pin>;
+        pinctrl-7 = <&P9_28_pruout_pin>;
+        pinctrl-8 = <&P9_28_pruin_pin>;
+    };
+
+    /* SPI SS1  GPIO1_17*/
+    P9_23_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "pwm";
+        pinctrl-0 = <&P9_23_default_pin>;
+        pinctrl-1 = <&P9_23_gpio_pin>;
+        pinctrl-2 = <&P9_23_gpio_pu_pin>;
+        pinctrl-3 = <&P9_23_gpio_pd_pin>;
+        pinctrl-4 = <&P9_23_pwm_pin>;
+    };
+
+    /* UART 1 TX */
+     P9_24_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "uart", "can", "i2c",  "pruin";
+        pinctrl-0 = <&P9_24_default_pin>;
+        pinctrl-1 = <&P9_24_gpio_pin>;
+        pinctrl-2 = <&P9_24_gpio_pu_pin>;
+        pinctrl-3 = <&P9_24_gpio_pd_pin>;
+        pinctrl-4 = <&P9_24_uart_pin>;
+        pinctrl-5 = <&P9_24_can_pin>;
+        pinctrl-6 = <&P9_24_i2c_pin>;
+        pinctrl-7 = <&P9_24_pruin_pin>;
+    };
+
+    /* UART 1 RX */
+    P9_26_pinmux {
+        compatible = "bone-pinmux-helper";
+        status = "okay";
+        pinctrl-names = "default", "gpio", "gpio_pu", "gpio_pd", "uart", "can", "i2c",  "pruin";
+        pinctrl-0 = <&P9_26_default_pin>;
+        pinctrl-1 = <&P9_26_gpio_pin>;
+        pinctrl-2 = <&P9_26_gpio_pu_pin>;
+        pinctrl-3 = <&P9_26_gpio_pd_pin>;
+        pinctrl-4 = <&P9_26_uart_pin>;
+        pinctrl-5 = <&P9_26_can_pin>;
+        pinctrl-6 = <&P9_26_i2c_pin>;
+        pinctrl-7 = <&P9_26_pruin_pin>;
+    };
+
+
+};
+
+
+/*******************************************************************************
+* PWMSS
+*******************************************************************************/
+&epwmss0 {
+	status = "okay";
+};
+
+&epwmss1 {
+	status = "okay";
+};
+
+&epwmss2 {
+	status = "okay";
+};
+
+&ehrpwm0 {
+	status = "okay";
+};
+
+&ehrpwm1 {
+	status = "okay";
+};
+
+&ehrpwm2 {
+	status = "okay";
+};
+
+/*******************************************************************************
+* EQEP
+*******************************************************************************/
+&eqep0 {
+	count_mode = <0>;  /* 0 - Quadrature mode, normal 90 phase offset cha & chb.  1 - Direction mode.  cha input = clock, chb input = direction */
+	swap_inputs = <0>; /* Are channel A and channel B swapped? (0 - no, 1 - yes) */
+	invert_qa = <1>;   /* Should we invert the channel A input?  */
+	invert_qb = <1>;   /* Should we invert the channel B input? I invert these because my encoder outputs drive transistors that pull down the pins */
+	invert_qi = <0>;   /* Should we invert the index input? */
+	invert_qs = <0>;   /* Should we invert the strobe input? */
+
+	status = "okay";
+};
+
+&eqep1 {
+	count_mode = <0>;  /* 0 - Quadrature mode, normal 90 phase offset cha & chb.  1 - Direction mode.  cha input = clock, chb input = direction */
+	swap_inputs = <0>; /* Are channel A and channel B swapped? (0 - no, 1 - yes) */
+	invert_qa = <1>;   /* Should we invert the channel A input?  */
+	invert_qb = <1>;   /* Should we invert the channel B input? I invert these because my encoder outputs drive transistors that pull down the pins */
+	invert_qi = <0>;   /* Should we invert the index input? */
+	invert_qs = <0>;   /* Should we invert the strobe input? */
+
+	status = "okay";
+};
+
+&eqep2 {
+	count_mode = <0>;  /* 0 - Quadrature mode, normal 90 phase offset cha & chb.  1 - Direction mode.  cha input = clock, chb input = direction */
+	swap_inputs = <0>; /* Are channel A and channel B swapped? (0 - no, 1 - yes) */
+	invert_qa = <1>;   /* Should we invert the channel A input?  */
+	invert_qb = <1>;   /* Should we invert the channel B input? I invert these because my encoder outputs drive transistors that pull down the pins */
+	invert_qi = <0>;   /* Should we invert the index input? */
+	invert_qs = <0>;   /* Should we invert the strobe input? */
+
+	status = "okay";
+};
+
+
+/*******************************************************************************
+* UART
+*******************************************************************************/
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&uart4 {
+	status = "okay";
+};
+
+&uart5 {
+	status = "okay";
+};
+
+
+/*******************************************************************************
+* PRU Encoder and Servos
+*******************************************************************************/
+&pruss {
+	status = "okay";
+};
+
+
+/*******************************************************************************
+* I2C
+*******************************************************************************/
+&i2c1 {
+	status = "okay";
+	clock-frequency = <400000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+};
+
+&i2c2 {
+	status = "okay";
+	clock-frequency = <400000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+};
+
+
+/*******************************************************************************
+* SPI
+*******************************************************************************/
+&spi1 {
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "okay";
+
+	channel@0 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		compatible = "spidev";
+
+		reg = <0>;
+		spi-max-frequency = <16000000>;
+		spi-cpha;
+	};
+
+	channel@1 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		compatible = "spidev";
+
+		reg = <1>;
+		spi-max-frequency = <16000000>;
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-sancloud-bbe.dts b/arch/arm/boot/dts/am335x-sancloud-bbe.dts
new file mode 100644
index 0000000..a36b04b
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-sancloud-bbe.dts
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "am33xx.dtsi"
+#include "am335x-bone-common.dtsi"
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/display/tda998x.h>
+/* #include "am335x-bone-jtag.dtsi" */
+
+/ {
+	model = "SanCloud BeagleBone Enhanced";
+	compatible = "sancloud,am335x-boneenhanced", "ti,am335x-bone-black", "ti,am335x-bone", "ti,am33xx";
+};
+
+&ldo3_reg {
+	regulator-min-microvolt = <1800000>;
+	regulator-max-microvolt = <1800000>;
+	regulator-always-on;
+};
+
+&mmc1 {
+	vmmc-supply = <&vmmcsd_fixed>;
+};
+
+&mmc2 {
+	vmmc-supply = <&vmmcsd_fixed>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&emmc_pins>;
+	bus-width = <8>;
+	status = "okay";
+	ti,vcc-aux-disable-is-sleep;
+};
+
+&am33xx_pinmux {
+	pinctrl-names = "default";
+	pinctrl-0 = <&usb_hub_ctrl>;
+
+	nxp_hdmi_bonelt_pins: nxp_hdmi_bonelt_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+			AM33XX_IOPAD(0x8a0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data0.lcd_data0 */
+			AM33XX_IOPAD(0x8a4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data1.lcd_data1 */
+			AM33XX_IOPAD(0x8a8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data2.lcd_data2 */
+			AM33XX_IOPAD(0x8ac, PIN_OUTPUT | MUX_MODE0)		/* lcd_data3.lcd_data3 */
+			AM33XX_IOPAD(0x8b0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data4.lcd_data4 */
+			AM33XX_IOPAD(0x8b4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data5.lcd_data5 */
+			AM33XX_IOPAD(0x8b8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data6.lcd_data6 */
+			AM33XX_IOPAD(0x8bc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data7.lcd_data7 */
+			AM33XX_IOPAD(0x8c0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data8.lcd_data8 */
+			AM33XX_IOPAD(0x8c4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data9.lcd_data9 */
+			AM33XX_IOPAD(0x8c8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data10.lcd_data10 */
+			AM33XX_IOPAD(0x8cc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data11.lcd_data11 */
+			AM33XX_IOPAD(0x8d0, PIN_OUTPUT | MUX_MODE0)		/* lcd_data12.lcd_data12 */
+			AM33XX_IOPAD(0x8d4, PIN_OUTPUT | MUX_MODE0)		/* lcd_data13.lcd_data13 */
+			AM33XX_IOPAD(0x8d8, PIN_OUTPUT | MUX_MODE0)		/* lcd_data14.lcd_data14 */
+			AM33XX_IOPAD(0x8dc, PIN_OUTPUT | MUX_MODE0)		/* lcd_data15.lcd_data15 */
+			AM33XX_IOPAD(0x8e0, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_vsync.lcd_vsync */
+			AM33XX_IOPAD(0x8e4, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_hsync.lcd_hsync */
+			AM33XX_IOPAD(0x8e8, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_pclk.lcd_pclk */
+			AM33XX_IOPAD(0x8ec, PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* lcd_ac_bias_en.lcd_ac_bias_en */
+		>;
+	};
+	nxp_hdmi_bonelt_off_pins: nxp_hdmi_bonelt_off_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9b0, PIN_OUTPUT_PULLDOWN | MUX_MODE3)	/* xdma_event_intr0 */
+		>;
+	};
+
+	cpsw_default: cpsw_default {
+		pinctrl-single,pins = <
+			/* Slave 1 */
+			0x114 (PIN_OUTPUT_PULLDOWN | MUX_MODE2)	/* mii1_txen.rgmii1_tctl */
+			0x118 (PIN_INPUT_PULLDOWN | MUX_MODE2)	/* mii1_rxdv.rgmii1_rctl */
+			0x11c (PIN_OUTPUT_PULLDOWN | MUX_MODE2)	/* mii1_txd3.rgmii1_td3 */
+			0x120 (PIN_OUTPUT_PULLDOWN | MUX_MODE2)	/* mii1_txd2.rgmii1_td2 */
+			0x124 (PIN_OUTPUT_PULLDOWN | MUX_MODE2)	/* mii1_txd1.rgmii1_td1 */
+			0x128 (PIN_OUTPUT_PULLDOWN | MUX_MODE2)	/* mii1_txd0.rgmii1_td0 */
+			0x12c (PIN_OUTPUT_PULLDOWN | MUX_MODE2)	/* mii1_txclk.rgmii1_tclk */
+			0x130 (PIN_INPUT_PULLDOWN | MUX_MODE2)	/* mii1_rxclk.rgmii1_rclk */
+			0x134 (PIN_INPUT_PULLDOWN | MUX_MODE2)	/* mii1_rxd3.rgmii1_rd3 */
+			0x138 (PIN_INPUT_PULLDOWN | MUX_MODE2)	/* mii1_rxd2.rgmii1_rd2 */
+			0x13c (PIN_INPUT_PULLDOWN | MUX_MODE2)	/* mii1_rxd1.rgmii1_rd1 */
+			0x140 (PIN_INPUT_PULLDOWN | MUX_MODE2)	/* mii1_rxd0.rgmii1_rd0 */
+		>;
+	};
+
+	cpsw_sleep: cpsw_sleep {
+		pinctrl-single,pins = <
+			/* Slave 1 reset value */
+			0x114 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x118 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x11c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x120 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x124 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x128 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x12c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x130 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x13c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x140 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+		>;
+	};
+
+	davinci_mdio_default: davinci_mdio_default {
+		pinctrl-single,pins = <
+			/* MDIO */
+			0x148 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0)	/* mdio_data.mdio_data */
+			0x14c (PIN_OUTPUT_PULLUP | MUX_MODE0)			/* mdio_clk.mdio_clk */
+		>;
+	};
+
+	davinci_mdio_sleep: davinci_mdio_sleep {
+		pinctrl-single,pins = <
+			/* MDIO reset value */
+			0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+		>;
+	};
+
+	usb_hub_ctrl: usb_hub_ctrl {
+		pinctrl-single,pins = <
+			0x144 (PIN_OUTPUT_PULLUP | MUX_MODE7)	/* mcasp0_ahclkr.gpio3_17 */
+		>;
+	};
+
+	mpu6050_pins: pinmux_mpu6050_pins {
+		pinctrl-single,pins = <
+			0x168 (PIN_INPUT | MUX_MODE7)	/* spi0_sclk.gpio0_2 */
+		>;
+	};
+
+	lps3331ap_pins: pinmux_lps3331ap_pins {
+		pinctrl-single,pins = <
+			0x6C (PIN_INPUT | MUX_MODE7)	/* conf_gpmc_a11.gpio1_27 */
+		>;
+	};
+
+	mcasp0_pins: mcasp0_pins {
+		pinctrl-single,pins = <
+			AM33XX_IOPAD(0x9ac, PIN_INPUT_PULLUP | MUX_MODE0) /* mcasp0_ahcklx.mcasp0_ahclkx */
+			AM33XX_IOPAD(0x99c, PIN_OUTPUT_PULLDOWN | MUX_MODE2) /* mcasp0_ahclkr.mcasp0_axr2*/
+			AM33XX_IOPAD(0x994, PIN_OUTPUT_PULLUP | MUX_MODE0) /* mcasp0_fsx.mcasp0_fsx */
+			AM33XX_IOPAD(0x990, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* mcasp0_aclkx.mcasp0_aclkx */
+			AM33XX_IOPAD(0x86c, PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* gpmc_a11.GPIO1_27 */
+		>;
+	};
+};
+
+&lcdc {
+	status = "okay";
+	port {
+		lcdc_0: endpoint@0 {
+			remote-endpoint = <&hdmi_0>;
+		};
+	};
+};
+
+&mac {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&cpsw_default>;
+	pinctrl-1 = <&cpsw_sleep>;
+};
+
+&davinci_mdio {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&davinci_mdio_default>;
+	pinctrl-1 = <&davinci_mdio_sleep>;
+};
+
+&cpsw_emac0 {
+	phy_id = <&davinci_mdio>, <0>;
+	phy-mode = "rgmii-txid";
+};
+
+&i2c0 {
+	tda19988: tda19988 {
+		compatible = "nxp,tda998x";
+		reg = <0x70>;
+
+		pinctrl-names = "default", "off";
+		pinctrl-0 = <&nxp_hdmi_bonelt_pins>;
+		pinctrl-1 = <&nxp_hdmi_bonelt_off_pins>;
+
+		#sound-dai-cells = <0>;
+		audio-ports = <	TDA998x_I2S	0x03>;
+
+		ports {
+			port@0 {
+				hdmi_0: endpoint@0 {
+					remote-endpoint = <&lcdc_0>;
+				};
+			};
+		};
+	};
+
+	lps331ap: lps331ap@5C {
+		compatible = "st,lps331ap";
+		st,drdy-int-pin = <1>;
+		reg = <0x5C>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <27 IRQ_TYPE_EDGE_RISING>;
+	};
+
+	mpu6050: mpu6050@68 {
+		compatible = "invensense,mpu6050";
+		reg = <0x68>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <2 IRQ_TYPE_EDGE_RISING>;
+		//orientation = <0xff 0 0 0 1 0 0 0 0xff>;
+	};
+};
+
+&mcasp0	{
+	#sound-dai-cells = <0>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&mcasp0_pins>;
+	status = "okay";
+	op-mode = <0>;	/* MCASP_IIS_MODE */
+	tdm-slots = <2>;
+	serial-dir = <	/* 0: INACTIVE, 1: TX, 2: RX */
+			0 0 1 0
+		>;
+	tx-num-evt = <32>;
+	rx-num-evt = <32>;
+};
+
+/ {
+	clk_mcasp0_fixed: clk_mcasp0_fixed {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <24576000>;
+	};
+
+	clk_mcasp0: clk_mcasp0 {
+		#clock-cells = <0>;
+		compatible = "gpio-gate-clock";
+		clocks = <&clk_mcasp0_fixed>;
+		enable-gpios = <&gpio1 27 0>; /* BeagleBone Black Clk enable on GPIO1_27 */
+	};
+
+	sound {
+		compatible = "simple-audio-card";
+		simple-audio-card,name = "TI BeagleBone Black";
+		simple-audio-card,format = "i2s";
+		simple-audio-card,bitclock-master = <&dailink0_master>;
+		simple-audio-card,frame-master = <&dailink0_master>;
+
+		dailink0_master: simple-audio-card,cpu {
+			sound-dai = <&mcasp0>;
+			clocks = <&clk_mcasp0>;
+		};
+
+		simple-audio-card,codec {
+			sound-dai = <&tda19988>;
+		};
+	};
+};
diff --git b/arch/arm/boot/dts/am335x-som-common.dtsi b/arch/arm/boot/dts/am335x-som-common.dtsi
new file mode 100644
index 0000000..fb4399b
--- /dev/null
+++ b/arch/arm/boot/dts/am335x-som-common.dtsi
@@ -0,0 +1,465 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/ {
+
+	cpus {
+		cpu@0 {
+			cpu0-supply = <&dcdc2_fixed>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x20000000>; /* 512 MB */
+	};
+
+	ocp {
+		uart0: serial@44e09000 {
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart0_pins>;
+
+			status = "okay";
+		};
+		uart1: serial@48022000 {
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart1_pins>;
+			status = "okay";
+
+		};
+		uart4: serial@481a8000 {
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart4_pins>;
+			status = "okay";
+		};
+
+		epwmss0: epwmss@48300000 {
+			status = "okay";
+
+			ecap0: ecap@48300100 {
+				status = "okay";
+				pinctrl-names = "default", "sleep";
+				pinctrl-0 = <&ecap0_pins_default>;
+				pinctrl-1 = <&ecap0_pins_sleep>;
+			};
+		};
+
+		musb: usb@47400000 {
+			status = "okay";
+
+			control@44e10000 {
+				status = "okay";
+			};
+
+			usb-phy@47401300 {
+				status = "okay";
+			};
+
+			usb-phy@47401b00 {
+				status = "okay";
+			};
+
+			usb@47401000 {
+				status = "okay";
+				dr_mode = "otg";
+			};
+
+			usb@47401800 {
+				status = "okay";
+				dr_mode = "host";
+			};
+
+			dma-controller@07402000  {
+				status = "okay";
+			};
+		};
+
+		i2c0: i2c@44e0b000 {
+			pinctrl-names = "default";
+			pinctrl-0 = <&i2c0_pins>;
+			status = "okay";
+			clock-frequency = <100000>;
+
+			tps: tps@24 {
+				reg = <0x24>;
+			};
+		};
+	};
+
+	vmmcsd_fixed: fixedregulator@0 {
+		compatible = "regulator-fixed";
+		regulator-name = "vmmcsd_fixed";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
+
+	dcdc2_fixed: fixedregulator@1 {
+		/* VDD_MPU voltage limits 0.95V - 1.325V with +/-4% tolerance */
+		 compatible = "regulator-fixed";
+		regulator-name = "dcdc2_fixed";
+
+		regulator-min-microvolt = <1378000>;
+		regulator-max-microvolt = <1378000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	leds {
+		pinctrl-names = "default";
+		pinctrl-0 = <&user_leds_s0>;
+
+		compatible = "gpio-leds";
+
+		led@1 {
+			label = "led1:green:heartbeat";
+			gpios = <&gpio0 19 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "heartbeat";
+		};
+
+		led@2 {
+			label = "led2:red:heartbeat";
+			gpios = <&gpio3 20 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "heartbeat";
+		};
+
+		led@3 {
+			label = "led3:yello:heartbeat";
+			gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "heartbeat";
+		};
+
+		led@4 {
+			label = "bkl";
+			gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>;
+			linux,default-trigger = "default-on";
+		};
+	};
+
+	backlight {
+		compatible = "pwm-backlight";
+		pwms = <&ecap0 0 500000 1>;
+		brightness-levels = <
+			0  1  2  3  4  5  6  7  8  9
+			10 11 12 13 14 15 16 17 18 19
+			20 21 22 23 24 25 26 27 28 29
+			30 31 32 33 34 35 36 37 38 39
+			40 41 42 43 44 45 46 47 48 49
+			50 51 52 53 54 55 56 57 58 59
+			60 61 62 63 64 65 66 67 68 69
+			70 71 72 73 74 75 76 77 78 79
+			80 81 82 83 84 85 86 87 88 89
+			90 91 92 93 94 95 96 97 98 99
+			100
+		>;
+		default-brightness-level = <50>;
+	};
+};
+
+&am33xx_pinmux {
+		pinctrl-names = "default";
+		pinctrl-0 = <&clkout2_pin>;
+
+		user_leds_s0: user_leds_s0 {
+			pinctrl-single,pins = <
+				0x1b0 (PIN_OUTPUT_PULLDOWN | MUX_MODE7)	/* xdma_event_intr0.gpio0_19 */
+				0x198 (PIN_OUTPUT_PULLDOWN | MUX_MODE7)	/* mcasp0_axr0.gpio3_20 */
+				0x1a8 (PIN_OUTPUT_PULLDOWN | MUX_MODE7)	/* mcasp0_axr1.gpio3_21 */
+				0x1a4 (PIN_OUTPUT_PULLDOWN | MUX_MODE7) /* mcasp0_fsr.gpio3[19], INPUT_PULLDOWN | MODE7 */
+			>;
+		};
+
+		i2c0_pins: pinmux_i2c0_pins {
+			pinctrl-single,pins = <
+				0x188 (PIN_INPUT_PULLUP | MUX_MODE0)	/* i2c0_sda.i2c0_sda */
+				0x18c (PIN_INPUT_PULLUP | MUX_MODE0)	/* i2c0_scl.i2c0_scl */
+			>;
+		};
+
+		uart0_pins: pinmux_uart0_pins {
+			pinctrl-single,pins = <
+				0x170 (PIN_INPUT_PULLUP | MUX_MODE0)	/* uart0_rxd.uart0_rxd */
+				0x174 (PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* uart0_txd.uart0_txd */
+			>;
+		};
+
+		uart1_pins: pinmux_uart1_pins {
+			pinctrl-single,pins = <
+				0x168 (PIN_INPUT_PULLUP | MUX_MODE1)
+				0x16c (PIN_OUTPUT_PULLDOWN | MUX_MODE1)
+			>;
+		};
+
+		uart4_pins: pinmux_uart4_pins {
+			pinctrl-single,pins = <
+				0x180 (PIN_INPUT_PULLUP | MUX_MODE0)
+				0x184 (PIN_OUTPUT_PULLDOWN | MUX_MODE0)
+			>;
+		};
+
+
+
+		clkout2_pin: pinmux_clkout2_pin {
+			pinctrl-single,pins = <
+				0x1b4 (PIN_OUTPUT_PULLDOWN | MUX_MODE7)	/* xdma_event_intr1.clkout2 */
+			>;
+		};
+
+		cpsw_default: cpsw_default {
+			pinctrl-single,pins = <
+				/* Slave 1 */
+				0x110 (PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxerr.mii1_rxerr */
+				0x114 (PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mii1_txen.mii1_txen */
+				0x118 (PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxdv.mii1_rxdv */
+				0x11c (PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mii1_txd3.mii1_txd3 */
+				0x120 (PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mii1_txd2.mii1_txd2 */
+				0x124 (PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mii1_txd1.mii1_txd1 */
+				0x128 (PIN_OUTPUT_PULLDOWN | MUX_MODE0)	/* mii1_txd0.mii1_txd0 */
+				0x12c (PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_txclk.mii1_txclk */
+				0x130 (PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxclk.mii1_rxclk */
+				0x134 (PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxd3.mii1_rxd3 */
+				0x138 (PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxd2.mii1_rxd2 */
+				0x13c (PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxd1.mii1_rxd1 */
+				0x140 (PIN_INPUT_PULLUP | MUX_MODE0)	/* mii1_rxd0.mii1_rxd0 */
+
+				0x040 (PIN_OUTPUT_PULLDOWN | MUX_MODE1) /* gpmc_a0.gmii2_txen, OUTPUT_PULLDOWN | MODE1 */
+				0x044 (PIN_INPUT_PULLDOWN | MUX_MODE1 ) /* gpmc_a1.gmii2_rxdv, INPUT_PULLDOWN | MODE1 */
+				0x048 (PIN_OUTPUT_PULLDOWN | MUX_MODE1) /* gpmc_a2.gmii2_txd3, OUTPUT_PULLDOWN | MODE1 */
+				0x04c (PIN_OUTPUT_PULLDOWN | MUX_MODE1) /* gpmc_a3.gmii2_txd2, OUTPUT_PULLDOWN | MODE1 */
+				0x050 (PIN_OUTPUT_PULLDOWN | MUX_MODE1) /* gpmc_a4.gmii2_txd1, OUTPUT_PULLDOWN | MODE1 */
+				0x054 (PIN_OUTPUT_PULLDOWN | MUX_MODE1) /* gpmc_a5.gmii2_txd0, OUTPUT_PULLDOWN | MODE1 */
+				0x058 (PIN_INPUT_PULLDOWN | MUX_MODE1 ) /* gpmc_a6.gmii2_txclk, INPUT_PULLDOWN | MODE1 */
+				0x05c (PIN_INPUT_PULLDOWN | MUX_MODE1 ) /* gpmc_a7.gmii2_rxclk, INPUT_PULLDOWN | MODE1 */
+				0x060 (PIN_INPUT_PULLDOWN | MUX_MODE1 ) /* gpmc_a8.gmii2_rxd3, INPUT_PULLDOWN | MODE1 */
+				0x064 (PIN_INPUT_PULLDOWN | MUX_MODE1 ) /* gpmc_a9.gmii2_rxd2, INPUT_PULLDOWN | MODE1 */
+				0x068 (PIN_INPUT_PULLDOWN | MUX_MODE1 ) /* gpmc_a10.gmii2_rxd1, INPUT_PULLDOWN | MODE1 */
+				0x06c (PIN_INPUT_PULLDOWN | MUX_MODE1 ) /* gpmc_a11.gmii2_rxd0, INPUT_PULLDOWN | MODE1 */
+				0x070 (PIN_INPUT_PULLUP | MUX_MODE1 )   /* gpmc_wait0.gmii2_crs, INPUT_PULLUP | MODE1 */
+				0x074 (PIN_INPUT_PULLUP | MUX_MODE1 )   /* gpmc_wpn.gmii2_rxer, INPUT_PULLUP | MODE1 */
+				0x078 (PIN_INPUT_PULLUP | MUX_MODE1 )   /* gpmc_ben1.gmii2_col, INPUT_PULLUP | MODE1 */
+			>;
+		};
+
+		cpsw_sleep: cpsw_sleep {
+			pinctrl-single,pins = <
+				/* Slave 1 reset value */
+				0x110 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x114 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x118 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x11c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x120 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x124 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x128 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x12c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x130 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x134 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x138 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x13c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x140 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+
+				0x40 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x44 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x48 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x4c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x50 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x54 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x58 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x5c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x60 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x64 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x68 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x6c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x070 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x074 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x078 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			>;
+		};
+
+		davinci_mdio_default: davinci_mdio_default {
+			pinctrl-single,pins = <
+				/* MDIO */
+				0x148 (PIN_INPUT_PULLUP | SLEWCTRL_FAST | MUX_MODE0)	/* mdio_data.mdio_data */
+				0x14c (PIN_OUTPUT_PULLUP | MUX_MODE0)			/* mdio_clk.mdio_clk */
+			>;
+		};
+
+		davinci_mdio_sleep: davinci_mdio_sleep {
+			pinctrl-single,pins = <
+				/* MDIO reset value */
+				0x148 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x14c (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			>;
+		};
+
+		mmc1_pins_default: pinmux_mmc1_pins {
+			pinctrl-single,pins = <
+				0x0F0 (PIN_INPUT_PULLUP | MUX_MODE0)	/* mmc0_dat3.mmc0_dat3 */
+				0x0F4 (PIN_INPUT_PULLUP | MUX_MODE0)	/* mmc0_dat2.mmc0_dat2 */
+				0x0F8 (PIN_INPUT_PULLUP | MUX_MODE0)	/* mmc0_dat1.mmc0_dat1 */
+				0x0FC (PIN_INPUT_PULLUP | MUX_MODE0)	/* mmc0_dat0.mmc0_dat0 */
+				0x100 (PIN_INPUT_PULLUP | MUX_MODE0)	/* mmc0_clk.mmc0_clk */
+				0x104 (PIN_INPUT_PULLUP | MUX_MODE0)	/* mmc0_cmd.mmc0_cmd */
+				0x1A0 (PIN_INPUT_PULLUP | MUX_MODE7)	/* mcasp0_aclkr.gpio3_18 */
+				0x160 (PIN_INPUT | MUX_MODE7)		/* spi0_cs1.gpio0_6 */
+			>;
+		};
+
+		mmc1_pins_sleep: pinmux_mmc1_pins_sleep {
+			pinctrl-single,pins = <
+				0x0F0 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x0F4 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x0F8 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x0FC (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x100 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x104 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x1A0 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+				0x160 (PIN_INPUT_PULLDOWN | MUX_MODE7)
+			>;
+		};
+
+		emmc_pins: pinmux_emmc_pins {
+			pinctrl-single,pins = <
+				0x80 (PIN_INPUT_PULLUP | MUX_MODE2) /* gpmc_csn1.mmc1_clk */
+				0x84 (PIN_INPUT_PULLUP | MUX_MODE2) /* gpmc_csn2.mmc1_cmd */
+				0x00 (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad0.mmc1_dat0 */
+				0x04 (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad1.mmc1_dat1 */
+				0x08 (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad2.mmc1_dat2 */
+				0x0c (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad3.mmc1_dat3 */
+				0x10 (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad4.mmc1_dat4 */
+				0x14 (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad5.mmc1_dat5 */
+				0x18 (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad6.mmc1_dat6 */
+				0x1c (PIN_INPUT_PULLUP | MUX_MODE1) /* gpmc_ad7.mmc1_dat7 */
+			>;
+		};
+
+			ecap0_pins_default: backlight_pins {
+			pinctrl-single,pins = <
+				0x164 0x0       /* eCAP0_in_PWM0_out.eCAP0_in_PWM0_out MODE0 */
+			>;
+		};
+
+		ecap0_pins_sleep: ecap0_pins_sleep {
+			pinctrl-single,pins = <
+				0x164  (PULL_DISABLE | MUX_MODE7)       /* eCAP0_in_PWM0_out.eCAP0_in_PWM0_out */
+			>;
+		};
+		dcan0_default: dcan0_default_pins {
+			pinctrl-single,pins = <
+				0x178 0x0a      /* uart1_ctsn.dcan0_tx_mux2, OUTPUT | MODE2 */
+				0x17c 0x2a      /* uart1_rtsn.dcan0_rx_mux2, INPUT | MODE2 */
+			>;
+		};
+	};
+
+&tps {
+	compatible = "ti,tps65217";
+	regulators {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		dcdc1_reg: regulator@0 {
+			reg = <0>;
+			regulator-always-on;
+		};
+
+		dcdc2_reg: regulator@1 {
+			reg = <1>;
+			/* VDD_MPU voltage limits 0.95V - 1.325V with +/-4% tolerance */
+			regulator-name = "vdd_mpu";
+			regulator-min-microvolt = <925000>;
+			regulator-max-microvolt = <1378000>;
+			regulator-boot-on;
+			regulator-always-on;
+		};
+
+		dcdc3_reg: regulator@2 {
+			reg = <2>;
+			/* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
+			regulator-name = "vdd_core";
+			regulator-min-microvolt = <925000>;
+			regulator-max-microvolt = <1150000>;
+			regulator-boot-on;
+			regulator-always-on;
+		};
+
+		ldo1_reg: regulator@3 {
+			reg = <3>;
+			regulator-always-on;
+		};
+
+		ldo2_reg: regulator@4 {
+			reg = <4>;
+			regulator-always-on;
+		};
+
+		ldo3_reg: regulator@5 {
+			reg = <5>;
+			regulator-always-on;
+		};
+
+		ldo4_reg: regulator@6 {
+			reg = <6>;
+			regulator-always-on;
+		};
+	};
+};
+
+&cpsw_emac0 {
+	phy_id = <&davinci_mdio>, <0>;
+	phy-mode = "mii";
+};
+
+&cpsw_emac1 {
+	phy_id = <&davinci_mdio>, <1>;
+	phy-mode = "mii";
+};
+
+&mac {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&cpsw_default>;
+	pinctrl-1 = <&cpsw_sleep>;
+	slaves = <2>;
+	dual_emac = <1>;
+	status = "okay";
+};
+
+&davinci_mdio {
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&davinci_mdio_default>;
+	pinctrl-1 = <&davinci_mdio_sleep>;
+	status = "okay";
+};
+
+&mmc1 {
+	status = "okay";
+	bus-width = <0x4>;
+	pinctrl-names = "default", "sleep";
+	pinctrl-0 = <&mmc1_pins_default>;
+	pinctrl-1 = <&mmc1_pins_sleep>;
+	cd-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
+	cd-inverted;
+};
+
+&dcan0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&dcan0_default>;
+	status = "okay";
+};
+
+&tscadc {
+	status = "okay";
+	tsc {
+		ti,wires = <4>;
+		ti,x-plate-resistance = <200>;
+		ti,coordinate-readouts = <5>;
+		ti,wire-config = <0x00 0x11 0x22 0x33>;
+	};
+
+	adc {
+		ti,adc-channels = <0 1 2 3>;
+	};
+};
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index d066a75..3d1d672 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -36,6 +36,9 @@
 		phy1 = &usb1_phy;
 		ethernet0 = &cpsw_emac0;
 		ethernet1 = &cpsw_emac1;
+		mmc0 = &mmc1;
+		mmc1 = &mmc2;
+		mmc2 = &mmc3;
 	};
 
 	cpus {
@@ -46,19 +49,7 @@
 			device_type = "cpu";
 			reg = <0>;
 
-			/*
-			 * To consider voltage drop between PMIC and SoC,
-			 * tolerance value is reduced to 2% from 4% and
-			 * voltage value is increased as a precaution.
-			 */
-			operating-points = <
-				/* kHz    uV */
-				720000  1285000
-				600000  1225000
-				500000  1125000
-				275000  1125000
-			>;
-			voltage-tolerance = <2>; /* 2 percentage */
+			operating-points-v2 = <&cpu0_opp_table>;
 
 			clocks = <&dpll_mpu_ck>;
 			clock-names = "cpu";
@@ -67,6 +58,80 @@
 		};
 	};
 
+	cpu0_opp_table: opp_table0 {
+		compatible = "operating-points-v2-ti-cpu";
+		ti,syscon-efuse = <&scm_conf 0x7fc 0x1fff 0>;
+		ti,syscon-rev = <&scm_conf 0x600>;
+
+		/*
+		 * The three following nodes are marked with opp-suspend
+		 * because the can not be enabled simultaneously on a
+		 * single SoC.
+		 */
+		opp50@300000000 {
+			opp-hz = /bits/ 64 <300000000>;
+			opp-microvolt = <950000 931000 969000>;
+			opp-supported-hw = <0x06 0x0010>;
+			opp-suspend;
+		};
+
+		opp100@275000000 {
+			opp-hz = /bits/ 64 <275000000>;
+			opp-microvolt = <1100000 1078000 1122000>;
+			opp-supported-hw = <0x01 0x00FF>;
+			opp-suspend;
+		};
+
+		opp100@300000000 {
+			opp-hz = /bits/ 64 <300000000>;
+			opp-microvolt = <1100000 1078000 1122000>;
+			opp-supported-hw = <0x06 0x0020>;
+			opp-suspend;
+		};
+
+		opp100@500000000 {
+			opp-hz = /bits/ 64 <500000000>;
+			opp-microvolt = <1100000 1078000 1122000>;
+			opp-supported-hw = <0x01 0xFFFF>;
+		};
+
+		opp100@600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <1100000 1078000 1122000>;
+			opp-supported-hw = <0x06 0x0040>;
+		};
+
+		opp120@600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <1200000 1176000 1224000>;
+			opp-supported-hw = <0x01 0xFFFF>;
+		};
+
+		opp120@720000000 {
+			opp-hz = /bits/ 64 <720000000>;
+			opp-microvolt = <1200000 1176000 1224000>;
+			opp-supported-hw = <0x06 0x0080>;
+		};
+
+		oppturbo@720000000 {
+			opp-hz = /bits/ 64 <720000000>;
+			opp-microvolt = <1260000 1234800 1285200>;
+			opp-supported-hw = <0x01 0xFFFF>;
+		};
+
+		oppturbo@800000000 {
+			opp-hz = /bits/ 64 <800000000>;
+			opp-microvolt = <1260000 1234800 1285200>;
+			opp-supported-hw = <0x06 0x0100>;
+		};
+
+		oppnitro@1000000000 {
+			opp-hz = /bits/ 64 <1000000000>;
+			opp-microvolt = <1325000 1298500 1351500>;
+			opp-supported-hw = <0x04 0x0200>;
+		};
+	};
+
 	pmu {
 		compatible = "arm,cortex-a8-pmu";
 		interrupts = <3>;
@@ -91,7 +156,7 @@
 	 * for the moment, just use a fake OCP bus entry to represent
 	 * the whole bus hierarchy.
 	 */
-	ocp {
+	ocp: ocp {
 		compatible = "simple-bus";
 		#address-cells = <1>;
 		#size-cells = <1>;
@@ -501,12 +566,25 @@
 			ti,timer-pwm;
 		};
 
+		pruss: pruss@4a300000 {
+			compatible = "ti,pruss-v2";
+			ti,hwmods = "pruss";
+			ti,deassert-hard-reset = "pruss", "pruss";
+			reg = <0x4a300000 0x080000>;
+			ti,pintc-offset = <0x20000>;
+			interrupt-parent = <&intc>;
+			status = "disabled";
+			interrupts = <20 21 22 23 24 25 26 27>;
+		};
+
 		rtc: rtc@44e3e000 {
 			compatible = "ti,am3352-rtc", "ti,da830-rtc";
 			reg = <0x44e3e000 0x1000>;
 			interrupts = <75
 				      76>;
 			ti,hwmods = "rtc";
+			ti,no-reset-on-init;
+			ti,no-idle-on-init;
 		};
 
 		spi0: spi@48030000 {
@@ -692,6 +770,16 @@
 				status = "disabled";
 			};
 
+			eqep0: eqep@0x48300180 {
+				compatible = "ti,am33xx-eqep";
+				reg = <0x48300180 0x80>;
+				clocks = <&l4ls_gclk>;
+				clock-names = "fck";
+				interrupt-parent = <&intc>;
+				interrupts = <79>;
+				status = "disabled";
+			};
+
 			ehrpwm0: pwm@48300200 {
 				compatible = "ti,am3352-ehrpwm",
 					     "ti,am33xx-ehrpwm";
@@ -726,6 +814,17 @@
 				status = "disabled";
 			};
 
+
+			eqep1: eqep@0x48302180 {
+				compatible = "ti,am33xx-eqep";
+				reg = <0x48302180 0x80>;
+				clocks = <&l4ls_gclk>;
+				clock-names = "fck";
+				interrupt-parent = <&intc>;
+				interrupts = <88>;
+				status = "disabled";
+			};
+
 			ehrpwm1: pwm@48302200 {
 				compatible = "ti,am3352-ehrpwm",
 					     "ti,am33xx-ehrpwm";
@@ -760,6 +859,16 @@
 				status = "disabled";
 			};
 
+			eqep2: eqep@0x48304180 {
+				compatible = "ti,am33xx-eqep";
+				reg = <0x48304180 0x80>;
+				clocks = <&l4ls_gclk>;
+				clock-names = "fck";
+				interrupt-parent = <&intc>;
+				interrupts = <89>;
+				status = "disabled";
+			};
+
 			ehrpwm2: pwm@48304200 {
 				compatible = "ti,am3352-ehrpwm",
 					     "ti,am33xx-ehrpwm";
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index 9435b6c..e2d77fd 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -50,15 +50,15 @@
 			clock-names = "cpu";
 
 			operating-points-v2 = <&cpu0_opp_table>;
-			ti,syscon-efuse = <&scm_conf 0x610 0x3f 0>;
-			ti,syscon-rev = <&scm_conf 0x600>;
 
 			clock-latency = <300000>; /* From omap-cpufreq driver */
 		};
 	};
 
 	cpu0_opp_table: opp_table0 {
-		compatible = "operating-points-v2";
+		compatible = "operating-points-v2-ti-cpu";
+		ti,syscon-efuse = <&scm_conf 0x610 0x3f 0>;
+		ti,syscon-rev = <&scm_conf 0x600>;
 
 		opp50@300000000 {
 			opp-hz = /bits/ 64 <300000000>;
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
index 6df7829..a2e4c29 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi
@@ -190,6 +190,11 @@
 		>;
 	};
 };
+
+&bb2d {
+	status = "okay";
+};
+
 &i2c1 {
 	status = "okay";
 	clock-frequency = <400000>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 064d84f..6e63e28 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -81,11 +81,7 @@
 			compatible = "arm,cortex-a15";
 			reg = <0>;
 
-			operating-points = <
-				/* kHz    uV */
-				1000000	1060000
-				1176000	1160000
-				>;
+			operating-points-v2 = <&cpu0_opp_table>;
 
 			clocks = <&dpll_mpu_ck>;
 			clock-names = "cpu";
@@ -99,6 +95,25 @@
 		};
 	};
 
+	cpu0_opp_table: opp_table0 {
+		compatible = "operating-points-v2-ti-cpu";
+		ti,syscon-efuse = <&scm_wkup 0x20c 0xf80000 19>;
+		ti,syscon-rev = <&scm_wkup 0x204>;
+
+		opp_nom@1000000000 {
+			opp-hz = /bits/ 64 <1000000000>;
+			opp-microvolt = <1060000 850000 1150000>;
+			opp-supported-hw = <0xFF 0x01>;
+			opp-suspend;
+		};
+
+		opp_od@1176000000 {
+			opp-hz = /bits/ 64 <1176000000>;
+			opp-microvolt = <1160000 885000 1160000>;
+			opp-supported-hw = <0xFF 0x02>;
+		};
+	};
+
 	/*
 	 * The soc node represents the soc top level view. It is used for IPs
 	 * that are not memory mapped in the MPU view or for the MPU itself.
@@ -401,6 +416,13 @@
 			reg = <0x40d00000 0x100>;
 		};
 
+		dra7_iodelay_core: padconf@4844a000 {
+			compatible = "ti,dra7-iodelay";
+			reg = <0x4844a000 0x0d1c>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+		};
+
 		sdma: dma-controller@4a056000 {
 			compatible = "ti,omap4430-sdma";
 			reg = <0x4a056000 0x1000>;
@@ -959,6 +981,16 @@
 			ti,hwmods = "dmm";
 		};
 
+		bb2d: bb2d@59000000 {
+			compatible = "ti,dra7-bb2d","vivante,gc";
+			reg = <0x59000000 0x0700>;
+			interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+			ti,hwmods = "bb2d";
+			clocks = <&dpll_core_h24x2_ck>;
+			clock-names = "fclk";
+			status = "disabled";
+		};
+
 		i2c1: i2c@48070000 {
 			compatible = "ti,omap4-i2c";
 			reg = <0x48070000 0x100>;
@@ -1970,6 +2002,12 @@
 		};
 	};
 
+	gpu-subsystem {
+		compatible = "vivante,gc-gpu-subsystem";
+		cores = <&bb2d>;
+		status = "okay";
+	};
+
 	thermal_zones: thermal-zones {
 		#include "omap4-cpu-thermal.dtsi"
 		#include "omap5-gpu-thermal.dtsi"
diff --git a/arch/arm/boot/dts/dra74x.dtsi b/arch/arm/boot/dts/dra74x.dtsi
index 0a78347..24e6746 100644
--- a/arch/arm/boot/dts/dra74x.dtsi
+++ b/arch/arm/boot/dts/dra74x.dtsi
@@ -17,6 +17,7 @@
 			device_type = "cpu";
 			compatible = "arm,cortex-a15";
 			reg = <1>;
+			operating-points-v2 = <&cpu0_opp_table>;
 		};
 	};
 
@@ -79,6 +80,10 @@
 	};
 };
 
+&cpu0_opp_table {
+	opp-shared;
+};
+
 &dss {
 	reg = <0x58000000 0x80>,
 	      <0x58004054 0x4>,
diff --git b/arch/arm/boot/dts/exynos5422-artik10-eval.dts b/arch/arm/boot/dts/exynos5422-artik10-eval.dts
new file mode 100644
index 0000000..1001255
--- /dev/null
+++ b/arch/arm/boot/dts/exynos5422-artik10-eval.dts
@@ -0,0 +1,278 @@
+/*
+ * SAMSUNG ARTIK10 board device tree source
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+/dts-v1/;
+
+#include <dt-bindings/clock/samsung,s2mps11.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/sound/samsung-i2s.h>
+#include "exynos5800.dtsi"
+#include "exynos5422-cpus.dtsi"
+#include "exynos-mfc-reserved-memory.dtsi"
+
+/ {
+	model = "Samsung ARTIK10 board based on EXYNOS5422";
+	compatible = "samsung,artik10", "samsung,exynos5422", "samsung,exynos5";
+
+	memory@40000000 {
+		device_type = "memory";
+		reg = <0x40000000 0x7EA00000>;
+	};
+
+	chosen {
+		stdout-path = "serial2:115200n8";
+	};
+
+	firmware@02073000 {
+		compatible = "samsung,secure-firmware";
+		reg = <0x02073000 0x1000>;
+	};
+
+	fixed-rate-clocks {
+		oscclk {
+			compatible = "samsung,exynos5420-oscclk";
+			clock-frequency = <24000000>;
+		};
+	};
+
+	rtc {
+		status = "okay";
+	};
+};
+
+&cpu0 {
+	cpu-supply = <&buck6_reg>;
+};
+
+&cpu4 {
+	cpu-supply = <&buck2_reg>;
+};
+
+&pinctrl_0 {
+	s2mps11_irq: s2mps11-irq {
+		samsung,pins = "gpx3-2";
+		samsung,pin-pud = <3>;
+		samsung,pin-drv = <3>;
+	};
+};
+
+&hsi2c_4 {
+	clock-frequency = <400000>;
+	status = "okay";
+
+	s2mps11_pmic@66 {
+		compatible = "samsung,s2mps11-pmic";
+		interrupt-parent = <&gpx3>;
+		interrupts = <2 IRQ_TYPE_EDGE_FALLING>;
+		reg = <0x66>;
+
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&s2mps11_irq>;
+
+		s2mps11_osc: clocks {
+			#clock-cells = <1>;
+			clock-output-names = "s2mps11_ap",
+					"s2mps11_cp", "s2mps11_bt";
+		};
+
+		regulators {
+			ldo4_reg: LDO4 {
+				regulator-name = "vdd_adc";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+			};
+
+			ldo6_reg: LDO6 {
+				regulator-name = "vdd_ldo6";
+				regulator-min-microvolt = <1000000>;
+				regulator-max-microvolt = <1000000>;
+				regulator-always-on;
+			};
+
+			ldo7_reg: LDO7 {
+				regulator-name = "vdd_ldo7";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+			};
+
+			ldo13_reg: LDO13 {
+				regulator-name = "vqmmc";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3300000>;
+			};
+
+			ldo17_reg: LDO17 {
+				regulator-name = "vdd_ldo17";
+				regulator-min-microvolt = <2800000>;
+				regulator-max-microvolt = <2800000>;
+			};
+
+			ldo18_reg: LDO18 {
+				regulator-name = "vdd_ldo18";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+			};
+
+			ldo19_reg: LDO19 {
+				regulator-name = "vmmc";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+			};
+
+			ldo20_reg: LDO20 {
+				regulator-name = "vdd_ldo20";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+			};
+
+			ldo23_reg: LDO23 {
+				regulator-name = "vdd_mifs";
+				regulator-min-microvolt = <800000>;
+				regulator-max-microvolt = <1100000>;
+				regulator-always-on;
+			};
+
+			ldo24_reg: LDO24 {
+				regulator-name = "vdd_ldo24";
+				regulator-min-microvolt = <2400000>;
+				regulator-max-microvolt = <2400000>;
+			};
+
+			ldo25_reg: LDO25 {
+				regulator-name = "vdd_ldo25";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+			};
+
+			ldo27_reg: LDO27 {
+				regulator-name = "vdd_g3ds";
+				regulator-min-microvolt = <800000>;
+				regulator-max-microvolt = <1100000>;
+				regulator-always-on;
+			};
+
+			ldo28_reg: LDO28 {
+				regulator-name = "vdd_ldo28";
+				regulator-min-microvolt = <2800000>;
+				regulator-max-microvolt = <2800000>;
+			};
+
+			ldo32_reg: LDO32 {
+				regulator-name = "vdd_lcd_1v8";
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+			};
+
+			ldo38_reg: LDO38 {
+				regulator-name = "vdd_ldo38";
+				regulator-min-microvolt = <2800000>;
+				regulator-max-microvolt = <2800000>;
+				regulator-always-on;
+			};
+
+			buck1_reg: BUCK1 {
+				regulator-name = "vdd_mif";
+				regulator-min-microvolt = <700000>;
+				regulator-max-microvolt = <1300000>;
+				regulator-always-on;
+			};
+
+			buck2_reg: BUCK2 {
+				regulator-name = "vdd_eagle";
+				regulator-min-microvolt = <800000>;
+				regulator-max-microvolt = <1500000>;
+				regulator-always-on;
+			};
+
+			buck3_reg: BUCK3 {
+				regulator-name = "vdd_int";
+				regulator-min-microvolt = <800000>;
+				regulator-max-microvolt = <1400000>;
+				regulator-always-on;
+			};
+
+			buck4_reg: BUCK4 {
+				regulator-name = "vdd_g3d";
+				regulator-min-microvolt = <700000>;
+				regulator-max-microvolt = <1400000>;
+				regulator-always-on;
+			};
+
+			buck6_reg: BUCK6 {
+				regulator-name = "vdd_kfc";
+				regulator-min-microvolt = <800000>;
+				regulator-max-microvolt = <1500000>;
+				regulator-always-on;
+			};
+
+			buck10_reg: BUCK10 {
+				regulator-name = "vdd_cam_isp_1.0v";
+				regulator-min-microvolt = <750000>;
+				regulator-max-microvolt = <1500000>;
+				regulator-always-on;
+			};
+		};
+	};
+};
+
+&mmc_0 {
+	status = "okay";
+	broken-cd;
+	card-detect-delay = <200>;
+	samsung,dw-mshc-ciu-div = <3>;
+	samsung,dw-mshc-sdr-timing = <0 4>;
+	samsung,dw-mshc-ddr-timing = <0 2>;
+	samsung,dw-mshc-hs400-timing = <0 2>;
+	samsung,read-strobe-delay = <90>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_bus1 &sd0_bus4 &sd0_bus8
+		     &sd0_rclk>;
+	bus-width = <8>;
+	cap-mmc-highspeed;
+	keep-power-in-suspend;
+	non-removable;
+};
+
+&mmc_2 {
+	status = "okay";
+	card-detect-delay = <200>;
+	samsung,dw-mshc-ciu-div = <3>;
+	samsung,dw-mshc-sdr-timing = <2 3>;
+	samsung,dw-mshc-ddr-timing = <1 2>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&sd2_clk &sd2_cmd &sd2_cd &sd2_bus1 &sd2_bus4>;
+	bus-width = <4>;
+	cap-sd-highspeed;
+	vmmc-supply = <&ldo28_reg>;
+};
+
+&nocp_mem0_0 {
+	status = "okay";
+};
+
+&nocp_mem0_1 {
+	status = "okay";
+};
+
+&nocp_mem1_0 {
+	status = "okay";
+};
+
+&nocp_mem1_1 {
+	status = "okay";
+};
diff --git b/arch/arm/boot/dts/imx6q-ccimx6sbc.dts b/arch/arm/boot/dts/imx6q-ccimx6sbc.dts
new file mode 100644
index 0000000..d249240
--- /dev/null
+++ b/arch/arm/boot/dts/imx6q-ccimx6sbc.dts
@@ -0,0 +1,303 @@
+/*
+ * Copyright (C) 2015 Robert Nelson (robertcnelson@gmail.com)
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+/dts-v1/;
+
+#include "imx6q.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+	model = "Digi ConnectCore-i.MX6 SBC Board";
+	compatible = "digi,connectcore/q", "fsl,imx6q";
+
+	chosen {
+		stdout-path = &uart4;
+	};
+
+	memory {
+		reg = <0x10000000 0x40000000>;
+	};
+
+	regulators {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		reg_usbh1_reset: regulator@1 {
+			compatible = "regulator-fixed";
+			reg = <1>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_usbh1>;
+			regulator-name = "usbh1_reset";
+			regulator-min-microvolt = <5000000>;
+			regulator-max-microvolt = <5000000>;
+			gpio = <&gpio3 10 GPIO_ACTIVE_HIGH>;
+			enable-active-high;
+		};
+
+		reg_usb_otg_vbus: regulator@2 {
+			compatible = "regulator-fixed";
+			reg = <2>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_usbotg>;
+			regulator-name = "usb_otg_vbus";
+			regulator-min-microvolt = <5000000>;
+			regulator-max-microvolt = <5000000>;
+			gpio = <&gpio3 22 GPIO_ACTIVE_HIGH>;
+			enable-active-high;
+		};
+	};
+};
+
+&fec {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_enet>;
+	phy-mode = "rgmii";
+	phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>;
+	status = "okay";
+};
+
+&hdmi {
+	ddc-i2c-bus = <&i2c3>;
+	status = "okay";
+};
+
+&i2c2 {
+	clock-frequency = <400000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c2>;
+	status = "okay";
+
+	pmic@58 {
+		compatible = "dlg,da9063";
+		reg = <0x58>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <17 0x8>; /* active-low GPIO0_17 */
+
+		regulators {
+			vdd_3v3_reg: bperi {
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+			};
+
+			ldo3_reg: ldo3 {
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+			};
+
+			ldo4_reg: ldo4 {
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+			};
+
+			ldo6_reg: ldo6 {
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+			};
+
+			ldo7_reg: ldo7 {
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+			};
+
+			ldo8_reg: ldo8 {
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+			};
+		};
+	};
+};
+
+&i2c3 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c3>;
+	status = "okay";
+};
+
+&iomuxc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_hog>;
+
+	imx6qdl-ccimx6sbc {
+		pinctrl_hog: hoggrp {
+			fsl,pins = <
+				/* da9063*/
+				MX6QDL_PAD_GPIO_17__GPIO7_IO12		0x80000000
+			>;
+		};
+
+		pinctrl_enet: enetgrp {
+			fsl,pins = <
+				MX6QDL_PAD_ENET_MDIO__ENET_MDIO		0x100b0
+				MX6QDL_PAD_ENET_MDC__ENET_MDC		0x100b0
+				MX6QDL_PAD_RGMII_TXC__RGMII_TXC		0x100b0
+				MX6QDL_PAD_RGMII_TD0__RGMII_TD0		0x100b0
+				MX6QDL_PAD_RGMII_TD1__RGMII_TD1		0x100b0
+				MX6QDL_PAD_RGMII_TD2__RGMII_TD2		0x100b0
+				MX6QDL_PAD_RGMII_TD3__RGMII_TD3		0x100b0
+				MX6QDL_PAD_RGMII_TX_CTL__RGMII_TX_CTL	0x100b0
+				MX6QDL_PAD_ENET_REF_CLK__ENET_TX_CLK	0x100b0
+				MX6QDL_PAD_RGMII_RXC__RGMII_RXC		0x1b0b0
+				MX6QDL_PAD_RGMII_RD0__RGMII_RD0		0x1b0b0
+				MX6QDL_PAD_RGMII_RD1__RGMII_RD1		0x1b0b0
+				MX6QDL_PAD_RGMII_RD2__RGMII_RD2		0x1b0b0
+				MX6QDL_PAD_RGMII_RD3__RGMII_RD3		0x1b0b0
+				MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL	0x1b0b0
+				/* Phy reset */
+				MX6QDL_PAD_ENET_CRS_DV__GPIO1_IO25	0x000b0
+			>;
+		};
+
+		pinctrl_i2c2: i2c2grp {
+			fsl,pins = <
+				MX6QDL_PAD_KEY_COL3__I2C2_SCL		0x4001b8b1
+				MX6QDL_PAD_KEY_ROW3__I2C2_SDA		0x4001b8b1
+			>;
+		};
+
+		pinctrl_i2c3: i2c3grp {
+			fsl,pins = <
+				MX6QDL_PAD_GPIO_5__I2C3_SCL		0x4001b8b1
+				MX6QDL_PAD_GPIO_6__I2C3_SDA		0x4001b8b1
+			>;
+		};
+
+		pinctrl_uart4: uart4grp {
+			fsl,pins = <
+				MX6QDL_PAD_KEY_COL0__UART4_TX_DATA	0x1b0b1
+				MX6QDL_PAD_KEY_ROW0__UART4_RX_DATA	0x1b0b1
+			>;
+		};
+
+		pinctrl_usbh1: usbh1grp {
+			fsl,pins = <
+				/* need to force low for hub reset */
+				MX6QDL_PAD_EIM_DA10__GPIO3_IO10		0x10b0
+			>;
+		};
+
+		pinctrl_usbotg: usbotggrp {
+			fsl,pins = <
+				MX6QDL_PAD_GPIO_1__USB_OTG_ID		0x17059
+				MX6QDL_PAD_EIM_D21__USB_OTG_OC		0x1b0b0
+				/* power enable, high active */
+				MX6QDL_PAD_EIM_D22__GPIO3_IO22		0x10b0
+			>;
+		};
+
+		pinctrl_usdhc2: usdhc2grp {
+			fsl,pins = <
+				MX6QDL_PAD_SD2_CMD__SD2_CMD		0x17059
+				MX6QDL_PAD_SD2_CLK__SD2_CLK		0x10059
+				MX6QDL_PAD_SD2_DAT0__SD2_DATA0		0x17059
+				MX6QDL_PAD_SD2_DAT1__SD2_DATA1		0x17059
+				MX6QDL_PAD_SD2_DAT2__SD2_DATA2		0x17059
+				MX6QDL_PAD_SD2_DAT3__SD2_DATA3		0x17059
+			>;
+		};
+
+		pinctrl_usdhc4: usdhc4grp {
+			fsl,pins = <
+				MX6QDL_PAD_SD4_CMD__SD4_CMD		0x17059
+				MX6QDL_PAD_SD4_CLK__SD4_CLK		0x10059
+				MX6QDL_PAD_SD4_DAT0__SD4_DATA0		0x17059
+				MX6QDL_PAD_SD4_DAT1__SD4_DATA1		0x17059
+				MX6QDL_PAD_SD4_DAT2__SD4_DATA2		0x17059
+				MX6QDL_PAD_SD4_DAT3__SD4_DATA3		0x17059
+				MX6QDL_PAD_SD4_DAT4__SD4_DATA4		0x17059
+				MX6QDL_PAD_SD4_DAT5__SD4_DATA5		0x17059
+				MX6QDL_PAD_SD4_DAT6__SD4_DATA6		0x17059
+				MX6QDL_PAD_SD4_DAT7__SD4_DATA7		0x17059
+			>;
+		};
+	};
+};
+
+&sata {
+	status = "okay";
+};
+
+&ssi1 {
+	status = "okay";
+};
+
+&uart4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart4>;
+	status = "okay";
+};
+
+&usbh1 {
+	vbus-supply = <&reg_usbh1_reset>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbh1>;
+	status = "okay";
+};
+
+&usbotg {
+	vbus-supply = <&reg_usb_otg_vbus>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbotg>;
+	status = "okay";
+};
+
+&usdhc2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc2>;
+	bus-width = <4>;
+	broken-cd; /* cd & wp, is not wired up on this board */
+	status = "okay";
+};
+
+&usdhc4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc4>;
+	bus-width = <8>;
+	non-removable;
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/imx6q-evi.dts b/arch/arm/boot/dts/imx6q-evi.dts
index 6de21ff..3277a06 100644
--- a/arch/arm/boot/dts/imx6q-evi.dts
+++ b/arch/arm/boot/dts/imx6q-evi.dts
@@ -54,18 +54,6 @@
 		reg = <0x10000000 0x40000000>;
 	};
 
-	reg_usbh1_vbus: regulator-usbhubreset {
-		compatible = "regulator-fixed";
-		regulator-name = "usbh1_vbus";
-		regulator-min-microvolt = <5000000>;
-		regulator-max-microvolt = <5000000>;
-		enable-active-high;
-		startup-delay-us = <2>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&pinctrl_usbh1_hubreset>;
-		gpio = <&gpio7 12 GPIO_ACTIVE_HIGH>;
-	};
-
 	reg_usb_otg_vbus: regulator-usbotgvbus {
 		compatible = "regulator-fixed";
 		regulator-name = "usb_otg_vbus";
@@ -207,12 +195,18 @@
 };
 
 &usbh1 {
-	vbus-supply = <&reg_usbh1_vbus>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usbh1>;
 	dr_mode = "host";
 	disable-over-current;
 	status = "okay";
+
+	usb2415host: hub@1 {
+		compatible = "usb424,2513";
+		reg = <1>;
+		reset-gpios = <&gpio7 12 GPIO_ACTIVE_LOW>;
+		reset-duration-us = <3000>;
+	};
 };
 
 &usbotg {
@@ -471,11 +465,6 @@
 			MX6QDL_PAD_GPIO_3__USB_H1_OC 0x1b0b0
 			/* usbh1_b OC */
 			MX6QDL_PAD_GPIO_0__GPIO1_IO00 0x1b0b0
-		>;
-	};
-
-	pinctrl_usbh1_hubreset: usbh1hubresetgrp {
-		fsl,pins = <
 			MX6QDL_PAD_GPIO_17__GPIO7_IO12 0x1b0b0
 		>;
 	};
diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
index c96c91d..a173de2 100644
--- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
@@ -9,6 +9,8 @@
  *
  */
 
+#include <dt-bindings/gpio/gpio.h>
+
 / {
 	aliases {
 		backlight = &backlight;
@@ -58,17 +60,6 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		reg_usb_h1_vbus: regulator@0 {
-			compatible = "regulator-fixed";
-			reg = <0>;
-			regulator-name = "usb_h1_vbus";
-			regulator-min-microvolt = <5000000>;
-			regulator-max-microvolt = <5000000>;
-			enable-active-high;
-			startup-delay-us = <2>; /* USB2415 requires a POR of 1 us minimum */
-			gpio = <&gpio7 12 0>;
-		};
-
 		reg_panel: regulator@1 {
 			compatible = "regulator-fixed";
 			reg = <1>;
@@ -188,7 +179,7 @@
 
 		pinctrl_usbh: usbhgrp {
 			fsl,pins = <
-				MX6QDL_PAD_GPIO_17__GPIO7_IO12 0x80000000
+				MX6QDL_PAD_GPIO_17__GPIO7_IO12	0x1b0b0
 				MX6QDL_PAD_NANDF_CS2__CCM_CLKO2 0x130b0
 			>;
 		};
@@ -259,9 +250,16 @@
 &usbh1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usbh>;
-	vbus-supply = <&reg_usb_h1_vbus>;
-	clocks = <&clks IMX6QDL_CLK_CKO>;
 	status = "okay";
+
+	usb2415: hub@1 {
+		compatible = "usb424,2514";
+		reg = <1>;
+
+		clocks = <&clks IMX6QDL_CLK_CKO>;
+		reset-gpios = <&gpio7 12 GPIO_ACTIVE_LOW>;
+		reset-duration-us = <3000>;
+	};
 };
 
 &usdhc3 {
diff --git a/arch/arm/boot/dts/imx6qdl-wandboard-revb1.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard-revb1.dtsi
index ef7fa62..b5d0f58 100644
--- a/arch/arm/boot/dts/imx6qdl-wandboard-revb1.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-wandboard-revb1.dtsi
@@ -11,6 +11,24 @@
 
 #include "imx6qdl-wandboard.dtsi"
 
+/ {
+	rfkill {
+		compatible = "wand,imx6qdl-wandboard-rfkill";
+		pinctrl-names = "default";
+		pinctrl-0 = <>;
+
+		bluetooth-on = <&gpio3 13 0>;
+		bluetooth-wake = <&gpio3 14 0>;
+		bluetooth-host-wake = <&gpio3 15 0>;
+
+		wifi-ref-on = <&gpio2 29 0>;
+		wifi-rst-n = <&gpio5 2 0>;
+		wifi-reg-on = <&gpio1 26 0>;
+		wifi-host-wake = <&gpio1 29 0>;
+		wifi-wake = <&gpio1 30 0>;
+	};
+};
+
 &iomuxc {
 	pinctrl-0 = <&pinctrl_hog>;
 
@@ -37,6 +55,5 @@
 &usdhc2 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc2>;
-	non-removable;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx6qdl-wandboard-revc1.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard-revc1.dtsi
index 8d893a7..b2097ef 100644
--- a/arch/arm/boot/dts/imx6qdl-wandboard-revc1.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-wandboard-revc1.dtsi
@@ -11,6 +11,24 @@
 
 #include "imx6qdl-wandboard.dtsi"
 
+/ {
+	rfkill {
+		compatible = "wand,imx6qdl-wandboard-rfkill";
+		pinctrl-names = "default";
+		pinctrl-0 = <>;
+
+		bluetooth-on = <&gpio5 21 0>;
+		bluetooth-wake = <&gpio5 30 0>;
+		bluetooth-host-wake = <&gpio5 20 0>;
+
+		wifi-ref-on = <&gpio5 31 0>; /* Wifi Power Enable */
+		wifi-rst-n = <&gpio6 0 0>; /* WIFI_ON reset */
+		wifi-reg-on = <&gpio1 26 0>; /* WL_REG_ON */
+		wifi-host-wake = <&gpio1 29 0>; /* WL_HOST_WAKE */
+		wifi-wake = <&gpio1 30 0>; /* WL_WAKE */
+	};
+};
+
 &iomuxc {
 	pinctrl-0 = <&pinctrl_hog>;
 
diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index be2e747..b946245 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -935,6 +935,8 @@
 
 			usbh1: usb@02184200 {
 				compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+				#address-cells = <1>;
+				#size-cells = <0>;
 				reg = <0x02184200 0x200>;
 				interrupts = <0 40 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clks IMX6QDL_CLK_USBOH3>;
@@ -949,6 +951,8 @@
 
 			usbh2: usb@02184400 {
 				compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+				#address-cells = <1>;
+				#size-cells = <0>;
 				reg = <0x02184400 0x200>;
 				interrupts = <0 41 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clks IMX6QDL_CLK_USBOH3>;
@@ -962,6 +966,8 @@
 
 			usbh3: usb@02184600 {
 				compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+				#address-cells = <1>;
+				#size-cells = <0>;
 				reg = <0x02184600 0x200>;
 				interrupts = <0 42 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clks IMX6QDL_CLK_USBOH3>;
diff --git b/arch/arm/boot/dts/imx6ul-14x14-evk-ism43362-b81-evb.dts b/arch/arm/boot/dts/imx6ul-14x14-evk-ism43362-b81-evb.dts
new file mode 100644
index 0000000..9ba86b8
--- /dev/null
+++ b/arch/arm/boot/dts/imx6ul-14x14-evk-ism43362-b81-evb.dts
@@ -0,0 +1,516 @@
+/*
+ * Copyright (C) 2015 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/dts-v1/;
+
+#include "imx6ul.dtsi"
+
+/ {
+	model = "Freescale i.MX6 UltraLite 14x14 EVK Board";
+	compatible = "fsl,imx6ul-14x14-evk", "fsl,imx6ul";
+
+	chosen {
+		stdout-path = &uart1;
+	};
+
+	memory {
+		reg = <0x80000000 0x20000000>;
+	};
+
+	backlight {
+		compatible = "pwm-backlight";
+		pwms = <&pwm1 0 5000000>;
+		brightness-levels = <0 4 8 16 32 64 128 255>;
+		default-brightness-level = <6>;
+		status = "okay";
+	};
+
+	regulators {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		reg_sd1_vmmc: sd1_regulator {
+			compatible = "regulator-fixed";
+			regulator-name = "VSD_3V3";
+			regulator-min-microvolt = <3300000>;
+			regulator-max-microvolt = <3300000>;
+			gpio = <&gpio1 9 GPIO_ACTIVE_HIGH>;
+			enable-active-high;
+		};
+
+		wlreg_on: fixedregulator@100 {
+			compatible = "regulator-fixed";
+			regulator-min-microvolt = <5000000>;
+			regulator-max-microvolt = <5000000>;
+			regulator-name = "wlreg_on";
+			gpio = <&gpio5 1 GPIO_ACTIVE_HIGH>;
+			startup-delay-us = <100>;
+			enable-active-high;
+		};
+	};
+
+	sound {
+		compatible = "simple-audio-card";
+		simple-audio-card,name = "mx6ul-wm8960";
+		simple-audio-card,format = "i2s";
+		simple-audio-card,bitclock-master = <&dailink_master>;
+		simple-audio-card,frame-master = <&dailink_master>;
+		simple-audio-card,widgets =
+			"Microphone", "Mic Jack",
+			"Line", "Line In",
+			"Line", "Line Out",
+			"Speaker", "Speaker",
+			"Headphone", "Headphone Jack";
+		simple-audio-card,routing =
+			"Headphone Jack", "HP_L",
+			"Headphone Jack", "HP_R",
+			"Speaker", "SPK_LP",
+			"Speaker", "SPK_LN",
+			"Speaker", "SPK_RP",
+			"Speaker", "SPK_RN",
+			"LINPUT1", "Mic Jack",
+			"LINPUT3", "Mic Jack",
+			"RINPUT1", "Mic Jack",
+			"RINPUT2", "Mic Jack";
+
+		simple-audio-card,cpu {
+			sound-dai = <&sai2>;
+		};
+
+		dailink_master: simple-audio-card,codec {
+			sound-dai = <&codec>;
+			clocks = <&clks IMX6UL_CLK_SAI2>;
+		};
+	};
+};
+
+&clks {
+	assigned-clocks = <&clks IMX6UL_CLK_PLL4_AUDIO_DIV>;
+	assigned-clock-rates = <786432000>;
+};
+
+&cpu0 {
+	arm-supply = <&reg_arm>;
+	soc-supply = <&reg_soc>;
+};
+
+&i2c2 {
+	clock_frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c2>;
+	status = "okay";
+
+	codec: wm8960@1a {
+		#sound-dai-cells = <0>;
+		compatible = "wlf,wm8960";
+		reg = <0x1a>;
+		wlf,shared-lrclk;
+	};
+};
+
+&fec1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_enet1>;
+	phy-mode = "rmii";
+	phy-handle = <&ethphy0>;
+	status = "okay";
+};
+
+&fec2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_enet2>;
+	phy-mode = "rmii";
+	phy-handle = <&ethphy1>;
+	status = "okay";
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethphy0: ethernet-phy@2 {
+			reg = <2>;
+		};
+
+		ethphy1: ethernet-phy@1 {
+			reg = <1>;
+		};
+	};
+};
+
+
+&lcdif {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_lcdif_dat
+		     &pinctrl_lcdif_ctrl>;
+	display = <&display0>;
+	status = "okay";
+
+	display0: display {
+		bits-per-pixel = <16>;
+		bus-width = <24>;
+
+		display-timings {
+			native-mode = <&timing0>;
+
+			timing0: timing0 {
+				clock-frequency = <9200000>;
+				hactive = <480>;
+				vactive = <272>;
+				hfront-porch = <8>;
+				hback-porch = <4>;
+				hsync-len = <41>;
+				vback-porch = <2>;
+				vfront-porch = <4>;
+				vsync-len = <10>;
+				hsync-active = <0>;
+				vsync-active = <0>;
+				de-active = <1>;
+				pixelclk-active = <0>;
+			};
+		};
+	};
+};
+
+&pwm1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pwm1>;
+	status = "okay";
+};
+
+&qspi {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_qspi>;
+	status = "okay";
+
+	flash0: n25q256a@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "micron,n25q256a";
+		spi-max-frequency = <29000000>;
+		reg = <0>;
+	};
+};
+
+&sai2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_sai2>;
+	assigned-clocks = <&clks IMX6UL_CLK_SAI2_SEL>,
+			  <&clks IMX6UL_CLK_SAI2>;
+	assigned-clock-parents = <&clks IMX6UL_CLK_PLL4_AUDIO_DIV>;
+	assigned-clock-rates = <0>, <12288000>;
+	fsl,sai-mclk-direction-output;
+	status = "okay";
+};
+
+&snvs_poweroff {
+	status = "okay";
+};
+
+&tsc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_tsc>;
+	xnur-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
+	measure-delay-time = <0xffff>;
+	pre-charge-time = <0xfff>;
+	status = "okay";
+};
+
+&uart1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart1>;
+	status = "okay";
+};
+
+&uart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart2>;
+	uart-has-rtscts;
+	status = "okay";
+};
+
+&usbotg1 {
+	dr_mode = "peripheral";
+	status = "okay";
+};
+
+&usbotg2 {
+	dr_mode = "host";
+	disable-over-current;
+	status = "okay";
+};
+
+&reg_sd1_vmmc {
+	regulator-always-on;
+};
+
+&usdhc1 {
+	pinctrl-names = "default", "state_100mhz", "state_200mhz";
+	pinctrl-0 = <&pinctrl_usdhc1>;
+	pinctrl-1 = <&pinctrl_usdhc1_100mhz>;
+	pinctrl-2 = <&pinctrl_usdhc1_200mhz>;
+	cd-gpios = <&gpio1 19 GPIO_ACTIVE_LOW>;
+	no-1-8-v;
+	keep-power-in-suspend;
+	wakeup-source;
+	vmmc-supply = <&reg_sd1_vmmc>;
+	status = "okay";
+};
+
+&usdhc2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc2>;
+	no-1-8-v;
+	keep-power-in-suspend;
+	wakeup-source;
+	status = "okay";
+};
+
+&wdog1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_wdog>;
+	fsl,ext-reset-output;
+};
+
+&iomuxc {
+	pinctrl-names = "default";
+
+	pinctrl_csi1: csi1grp {
+		fsl,pins = <
+			MX6UL_PAD_CSI_MCLK__CSI_MCLK		0x1b088
+			MX6UL_PAD_CSI_PIXCLK__CSI_PIXCLK	0x1b088
+			MX6UL_PAD_CSI_VSYNC__CSI_VSYNC		0x1b088
+			MX6UL_PAD_CSI_HSYNC__CSI_HSYNC		0x1b088
+			MX6UL_PAD_CSI_DATA00__CSI_DATA02	0x1b088
+			MX6UL_PAD_CSI_DATA01__CSI_DATA03	0x1b088
+			MX6UL_PAD_CSI_DATA02__CSI_DATA04	0x1b088
+			MX6UL_PAD_CSI_DATA03__CSI_DATA05	0x1b088
+			MX6UL_PAD_CSI_DATA04__CSI_DATA06	0x1b088
+			MX6UL_PAD_CSI_DATA05__CSI_DATA07	0x1b088
+			MX6UL_PAD_CSI_DATA06__CSI_DATA08	0x1b088
+			MX6UL_PAD_CSI_DATA07__CSI_DATA09	0x1b088
+		>;
+	};
+
+	pinctrl_enet1: enet1grp {
+		fsl,pins = <
+			MX6UL_PAD_ENET1_RX_EN__ENET1_RX_EN	0x1b0b0
+			MX6UL_PAD_ENET1_RX_ER__ENET1_RX_ER	0x1b0b0
+			MX6UL_PAD_ENET1_RX_DATA0__ENET1_RDATA00	0x1b0b0
+			MX6UL_PAD_ENET1_RX_DATA1__ENET1_RDATA01	0x1b0b0
+			MX6UL_PAD_ENET1_TX_EN__ENET1_TX_EN	0x1b0b0
+			MX6UL_PAD_ENET1_TX_DATA0__ENET1_TDATA00	0x1b0b0
+			MX6UL_PAD_ENET1_TX_DATA1__ENET1_TDATA01	0x1b0b0
+			MX6UL_PAD_ENET1_TX_CLK__ENET1_REF_CLK1	0x4001b031
+		>;
+	};
+
+	pinctrl_enet2: enet2grp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO07__ENET2_MDC		0x1b0b0
+			MX6UL_PAD_GPIO1_IO06__ENET2_MDIO	0x1b0b0
+			MX6UL_PAD_ENET2_RX_EN__ENET2_RX_EN	0x1b0b0
+			MX6UL_PAD_ENET2_RX_ER__ENET2_RX_ER	0x1b0b0
+			MX6UL_PAD_ENET2_RX_DATA0__ENET2_RDATA00	0x1b0b0
+			MX6UL_PAD_ENET2_RX_DATA1__ENET2_RDATA01	0x1b0b0
+			MX6UL_PAD_ENET2_TX_EN__ENET2_TX_EN	0x1b0b0
+			MX6UL_PAD_ENET2_TX_DATA0__ENET2_TDATA00	0x1b0b0
+			MX6UL_PAD_ENET2_TX_DATA1__ENET2_TDATA01	0x1b0b0
+			MX6UL_PAD_ENET2_TX_CLK__ENET2_REF_CLK2	0x4001b031
+			MX6UL_PAD_SNVS_TAMPER0__GPIO5_IO00	0x17059
+		>;
+	};
+
+	pinctrl_flexcan1: flexcan1grp{
+		fsl,pins = <
+			MX6UL_PAD_UART3_RTS_B__FLEXCAN1_RX	0x1b020
+			MX6UL_PAD_UART3_CTS_B__FLEXCAN1_TX	0x1b020
+		>;
+	};
+
+	pinctrl_flexcan2: flexcan2grp{
+		fsl,pins = <
+			MX6UL_PAD_UART2_RTS_B__FLEXCAN2_RX	0x1b020
+			MX6UL_PAD_UART2_CTS_B__FLEXCAN2_TX	0x1b020
+		>;
+	};
+
+	pinctrl_i2c1: i2c1grp {
+		fsl,pins = <
+			MX6UL_PAD_UART4_TX_DATA__I2C1_SCL 0x4001b8b0
+			MX6UL_PAD_UART4_RX_DATA__I2C1_SDA 0x4001b8b0
+		>;
+	};
+
+	pinctrl_i2c2: i2c2grp {
+		fsl,pins = <
+			MX6UL_PAD_UART5_TX_DATA__I2C2_SCL 0x4001b8b0
+			MX6UL_PAD_UART5_RX_DATA__I2C2_SDA 0x4001b8b0
+		>;
+	};
+
+	pinctrl_lcdif_dat: lcdifdatgrp {
+		fsl,pins = <
+			MX6UL_PAD_LCD_DATA00__LCDIF_DATA00  0x79
+			MX6UL_PAD_LCD_DATA01__LCDIF_DATA01  0x79
+			MX6UL_PAD_LCD_DATA02__LCDIF_DATA02  0x79
+			MX6UL_PAD_LCD_DATA03__LCDIF_DATA03  0x79
+			MX6UL_PAD_LCD_DATA04__LCDIF_DATA04  0x79
+			MX6UL_PAD_LCD_DATA05__LCDIF_DATA05  0x79
+			MX6UL_PAD_LCD_DATA06__LCDIF_DATA06  0x79
+			MX6UL_PAD_LCD_DATA07__LCDIF_DATA07  0x79
+			MX6UL_PAD_LCD_DATA08__LCDIF_DATA08  0x79
+			MX6UL_PAD_LCD_DATA09__LCDIF_DATA09  0x79
+			MX6UL_PAD_LCD_DATA10__LCDIF_DATA10  0x79
+			MX6UL_PAD_LCD_DATA11__LCDIF_DATA11  0x79
+			MX6UL_PAD_LCD_DATA12__LCDIF_DATA12  0x79
+			MX6UL_PAD_LCD_DATA13__LCDIF_DATA13  0x79
+			MX6UL_PAD_LCD_DATA14__LCDIF_DATA14  0x79
+			MX6UL_PAD_LCD_DATA15__LCDIF_DATA15  0x79
+			MX6UL_PAD_LCD_DATA16__LCDIF_DATA16  0x79
+			MX6UL_PAD_LCD_DATA17__LCDIF_DATA17  0x79
+			MX6UL_PAD_LCD_DATA18__LCDIF_DATA18  0x79
+			MX6UL_PAD_LCD_DATA19__LCDIF_DATA19  0x79
+			MX6UL_PAD_LCD_DATA20__LCDIF_DATA20  0x79
+			MX6UL_PAD_LCD_DATA21__LCDIF_DATA21  0x79
+			MX6UL_PAD_LCD_DATA22__LCDIF_DATA22  0x79
+			MX6UL_PAD_LCD_DATA23__LCDIF_DATA23  0x79
+		>;
+	};
+
+	pinctrl_lcdif_ctrl: lcdifctrlgrp {
+		fsl,pins = <
+			MX6UL_PAD_LCD_CLK__LCDIF_CLK	    0x79
+			MX6UL_PAD_LCD_ENABLE__LCDIF_ENABLE  0x79
+			MX6UL_PAD_LCD_HSYNC__LCDIF_HSYNC    0x79
+			MX6UL_PAD_LCD_VSYNC__LCDIF_VSYNC    0x79
+			/* used for lcd reset */
+			MX6UL_PAD_SNVS_TAMPER9__GPIO5_IO09  0x79
+		>;
+	};
+
+	pinctrl_qspi: qspigrp {
+		fsl,pins = <
+			MX6UL_PAD_NAND_WP_B__QSPI_A_SCLK	0x70a1
+			MX6UL_PAD_NAND_READY_B__QSPI_A_DATA00	0x70a1
+			MX6UL_PAD_NAND_CE0_B__QSPI_A_DATA01	0x70a1
+			MX6UL_PAD_NAND_CE1_B__QSPI_A_DATA02	0x70a1
+			MX6UL_PAD_NAND_CLE__QSPI_A_DATA03	0x70a1
+			MX6UL_PAD_NAND_DQS__QSPI_A_SS0_B	0x70a1
+		>;
+	};
+
+	pinctrl_sai2: sai2grp {
+		fsl,pins = <
+			MX6UL_PAD_JTAG_TDI__SAI2_TX_BCLK	0x17088
+			MX6UL_PAD_JTAG_TDO__SAI2_TX_SYNC	0x17088
+			MX6UL_PAD_JTAG_TRST_B__SAI2_TX_DATA	0x11088
+			MX6UL_PAD_JTAG_TCK__SAI2_RX_DATA	0x11088
+			MX6UL_PAD_JTAG_TMS__SAI2_MCLK		0x17088
+			MX6UL_PAD_SNVS_TAMPER4__GPIO5_IO04	0x17059
+		>;
+	};
+
+	pinctrl_pwm1: pwm1grp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO08__PWM1_OUT   0x110b0
+		>;
+	};
+
+	pinctrl_sim2: sim2grp {
+		fsl,pins = <
+			MX6UL_PAD_CSI_DATA03__SIM2_PORT1_PD		0xb808
+			MX6UL_PAD_CSI_DATA04__SIM2_PORT1_CLK		0x31
+			MX6UL_PAD_CSI_DATA05__SIM2_PORT1_RST_B		0xb808
+			MX6UL_PAD_CSI_DATA06__SIM2_PORT1_SVEN		0xb808
+			MX6UL_PAD_CSI_DATA07__SIM2_PORT1_TRXD		0xb809
+			MX6UL_PAD_CSI_DATA02__GPIO4_IO23		0x3008
+		>;
+	};
+
+	pinctrl_tsc: tscgrp {
+		fsl,pins = <
+			MX6UL_PAD_GPIO1_IO01__GPIO1_IO01		0xb0
+			MX6UL_PAD_GPIO1_IO02__GPIO1_IO02		0xb0
+			MX6UL_PAD_GPIO1_IO03__GPIO1_IO03		0xb0
+			MX6UL_PAD_GPIO1_IO04__GPIO1_IO04		0xb0
+		>;
+	};
+
+	pinctrl_uart1: uart1grp {
+		fsl,pins = <
+			MX6UL_PAD_UART1_TX_DATA__UART1_DCE_TX 0x1b0b1
+			MX6UL_PAD_UART1_RX_DATA__UART1_DCE_RX 0x1b0b1
+		>;
+	};
+
+	pinctrl_uart2: uart2grp {
+		fsl,pins = <
+			MX6UL_PAD_UART2_TX_DATA__UART2_DCE_TX	0x1b0b1
+			MX6UL_PAD_UART2_RX_DATA__UART2_DCE_RX	0x1b0b1
+			MX6UL_PAD_UART3_RX_DATA__UART2_DCE_RTS	0x1b0b1
+			MX6UL_PAD_UART3_TX_DATA__UART2_DCE_CTS	0x1b0b1
+		>;
+	};
+
+	pinctrl_usdhc1: usdhc1grp {
+		fsl,pins = <
+			MX6UL_PAD_SD1_CMD__USDHC1_CMD     	0x17059
+			MX6UL_PAD_SD1_CLK__USDHC1_CLK     	0x10071
+			MX6UL_PAD_SD1_DATA0__USDHC1_DATA0 	0x17059
+			MX6UL_PAD_SD1_DATA1__USDHC1_DATA1 	0x17059
+			MX6UL_PAD_SD1_DATA2__USDHC1_DATA2 	0x17059
+			MX6UL_PAD_SD1_DATA3__USDHC1_DATA3 	0x17059
+			MX6UL_PAD_UART1_RTS_B__GPIO1_IO19       0x17059 /* SD1 CD */
+			MX6UL_PAD_GPIO1_IO05__USDHC1_VSELECT    0x17059 /* SD1 VSELECT */
+			MX6UL_PAD_GPIO1_IO09__GPIO1_IO09        0x17059 /* SD1 RESET */
+			MX6UL_PAD_SNVS_TAMPER1__GPIO5_IO01 0x3029
+		>;
+	};
+
+	pinctrl_usdhc1_100mhz: usdhc1grp100mhz {
+		fsl,pins = <
+			MX6UL_PAD_SD1_CMD__USDHC1_CMD     0x170b9
+			MX6UL_PAD_SD1_CLK__USDHC1_CLK     0x100b9
+			MX6UL_PAD_SD1_DATA0__USDHC1_DATA0 0x170b9
+			MX6UL_PAD_SD1_DATA1__USDHC1_DATA1 0x170b9
+			MX6UL_PAD_SD1_DATA2__USDHC1_DATA2 0x170b9
+			MX6UL_PAD_SD1_DATA3__USDHC1_DATA3 0x170b9
+			MX6UL_PAD_SNVS_TAMPER1__GPIO5_IO01 0x3029
+		>;
+	};
+
+	pinctrl_usdhc1_200mhz: usdhc1grp200mhz {
+		fsl,pins = <
+			MX6UL_PAD_SD1_CMD__USDHC1_CMD     0x170f9
+			MX6UL_PAD_SD1_CLK__USDHC1_CLK     0x100f9
+			MX6UL_PAD_SD1_DATA0__USDHC1_DATA0 0x170f9
+			MX6UL_PAD_SD1_DATA1__USDHC1_DATA1 0x170f9
+			MX6UL_PAD_SD1_DATA2__USDHC1_DATA2 0x170f9
+			MX6UL_PAD_SD1_DATA3__USDHC1_DATA3 0x170f9
+			MX6UL_PAD_SNVS_TAMPER1__GPIO5_IO01 0x3029
+		>;
+	};
+
+	pinctrl_usdhc2: usdhc2grp {
+		fsl,pins = <
+			MX6UL_PAD_NAND_RE_B__USDHC2_CLK     0x17059
+			MX6UL_PAD_NAND_WE_B__USDHC2_CMD     0x17059
+			MX6UL_PAD_NAND_DATA00__USDHC2_DATA0 0x17059
+			MX6UL_PAD_NAND_DATA01__USDHC2_DATA1 0x17059
+			MX6UL_PAD_NAND_DATA02__USDHC2_DATA2 0x17059
+			MX6UL_PAD_NAND_DATA03__USDHC2_DATA3 0x17059
+		>;
+	};
+
+	pinctrl_wdog: wdoggrp {
+		fsl,pins = <
+			MX6UL_PAD_LCD_RESET__WDOG1_WDOG_ANY    0x30b0
+		>;
+	};
+};
diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts
index 85e297e..226b833 100644
--- a/arch/arm/boot/dts/omap3-beagle-xm.dts
+++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
@@ -27,6 +27,7 @@
 	aliases {
 		display0 = &dvi0;
 		display1 = &tv0;
+		ethernet = &ethernet;
 	};
 
 	leds {
@@ -146,6 +147,7 @@
 	};
 
 	etb@5401b000 {
+		status = "disabled";
 		compatible = "arm,coresight-etb10", "arm,primecell";
 		reg = <0x5401b000 0x1000>;
 
@@ -160,6 +162,7 @@
 	};
 
 	etm@54010000 {
+		status = "disabled";
 		compatible = "arm,coresight-etm3x", "arm,primecell";
 		reg = <0x54010000 0x1000>;
 
@@ -205,6 +208,25 @@
 		>;
 	};
 
+	spi3_pins: pinmux_spi3_pins {
+		pinctrl-single,pins = <
+			0x128 (PIN_INPUT | MUX_MODE1) /* sdmmc2_clk.mcspi3_clk gpio_130 */
+			0x12a (PIN_OUTPUT | MUX_MODE1) /* sdmmc2_cmd.mcspi3_simo gpio_131 */
+			0x12c (PIN_INPUT_PULLUP | MUX_MODE1) /* sdmmc2_dat0.mcspi3_somi gpio_132 */
+			0x130 (PIN_OUTPUT | MUX_MODE1) /* sdmmc2_dat2.mcspi3_cs1 gpio_134 */
+			0x132 (PIN_OUTPUT | MUX_MODE1) /* sdmmc2_dat3.mcspi3_cs0 gpio_135 */
+		>;
+	};
+
+	spi4_pins: pinmux_spi4_pins {
+		pinctrl-single,pins = <
+			0x15c (PIN_INPUT | MUX_MODE1) /* mcbsp1_clkr.mcspi4_clk gpio_156 */
+			0x160 (PIN_OUTPUT | MUX_MODE1) /* mcbsp1_dx.mcspi4_simo gpio_158 */
+			0x162 (PIN_INPUT_PULLUP | MUX_MODE1) /* mcbsp1_dr.mcspi4_somi gpio_159 */
+			0x166 (PIN_OUTPUT | MUX_MODE1) /* mcbsp1_fsx.mcspi4_cs0 gpio_161 */
+		>;
+	};
+
 	hsusb2_pins: pinmux_hsusb2_pins {
 		pinctrl-single,pins = <
 			OMAP3_CORE1_IOPAD(0x21d4, PIN_INPUT_PULLDOWN | MUX_MODE3)	/* mcspi1_cs3.hsusb2_data2 */
@@ -279,7 +301,7 @@
 		};
 
 		twl_power: power {
-			compatible = "ti,twl4030-power-beagleboard-xm", "ti,twl4030-power-idle-osc-off";
+			compatible = "ti,twl4030-power-reset";
 			ti,use_poweroff;
 		};
 	};
@@ -310,6 +332,36 @@
 	status = "disabled";
 };
 
+&mcspi3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&spi3_pins>;
+	status = "okay";
+
+	spidev0: spi@0 {
+		compatible = "spidev";
+		reg = <0>;
+		spi-max-frequency = <48000000>;
+	};
+
+	spidev1: spi@1 {
+		compatible = "spidev";
+		reg = <1>;
+		spi-max-frequency = <48000000>;
+	};
+};
+
+&mcspi4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&spi4_pins>;
+	status = "okay";
+
+	spidev2: spi@0 {
+		compatible = "spidev";
+		reg = <0>;
+		spi-max-frequency = <48000000>;
+	};
+};
+
 &twl_gpio {
 	ti,use-leds;
 	/* pullups: BIT(1) */
@@ -348,6 +400,21 @@
 
 &usbhsehci {
 	phys = <0 &hsusb2_phy>;
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	hub@2 {
+		compatible = "usb424,9514";
+		reg = <2>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethernet: usbether@1 {
+			compatible = "usb424,ec00";
+			reg = <1>;
+		};
+	};
 };
 
 &vaux2 {
diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
index 4be85ce..049b5e1 100644
--- a/arch/arm/boot/dts/omap3-beagle.dts
+++ b/arch/arm/boot/dts/omap3-beagle.dts
@@ -141,6 +141,7 @@
 	};
 
 	etb@540000000 {
+		status = "disabled";
 		compatible = "arm,coresight-etb10", "arm,primecell";
 		reg = <0x5401b000 0x1000>;
 
@@ -155,6 +156,7 @@
 	};
 
 	etm@54010000 {
+		status = "disabled";
 		compatible = "arm,coresight-etm3x", "arm,primecell";
 		reg = <0x54010000 0x1000>;
 
@@ -271,9 +273,18 @@
 			codec {
 			};
 		};
+
+		twl_power: power {
+			compatible = "ti,twl4030-power-reset";
+			ti,use_poweroff;
+		};
 	};
 };
 
+&i2c2 {
+	clock-frequency = <400000>;
+};
+
 #include "twl4030.dtsi"
 #include "twl4030_omap3.dtsi"
 
diff --git a/arch/arm/boot/dts/omap4-panda-a4.dts b/arch/arm/boot/dts/omap4-panda-a4.dts
index 78d3631..67ee5b4 100644
--- a/arch/arm/boot/dts/omap4-panda-a4.dts
+++ b/arch/arm/boot/dts/omap4-panda-a4.dts
@@ -10,6 +10,18 @@
 #include "omap443x.dtsi"
 #include "omap4-panda-common.dtsi"
 
+&emif1 {
+	cs1-used;
+	device-handle = <&elpida_ECB240ABACN>;
+	status = "ok";
+};
+
+&emif2 {
+	cs1-used;
+	device-handle = <&elpida_ECB240ABACN>;
+	status = "ok";
+};
+
 /* Pandaboard Rev A4+ have external pullups on SCL & SDA */
 &dss_hdmi_pins {
 	pinctrl-single,pins = <
diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi
index 1673689..c6b90f0 100644
--- a/arch/arm/boot/dts/omap4-panda-common.dtsi
+++ b/arch/arm/boot/dts/omap4-panda-common.dtsi
@@ -463,16 +463,6 @@
 	};
 };
 
-&emif1 {
-	cs1-used;
-	device-handle = <&elpida_ECB240ABACN>;
-};
-
-&emif2 {
-	cs1-used;
-	device-handle = <&elpida_ECB240ABACN>;
-};
-
 &mcbsp1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mcbsp1_pins>;
diff --git b/arch/arm/boot/dts/omap4-panda-es-b3.dts b/arch/arm/boot/dts/omap4-panda-es-b3.dts
new file mode 100644
index 0000000..2f1dabc
--- /dev/null
+++ b/arch/arm/boot/dts/omap4-panda-es-b3.dts
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+#include "omap4460.dtsi"
+#include "omap4-panda-common.dtsi"
+
+/ {
+	model = "TI OMAP4 PandaBoard-ES";
+	compatible = "ti,omap4-panda-es", "ti,omap4-panda", "ti,omap4460", "ti,omap4430", "ti,omap4";
+};
+
+/* Audio routing is differnet between PandaBoard4430 and PandaBoardES */
+&sound {
+	ti,model = "PandaBoardES";
+
+	/* Audio routing */
+	ti,audio-routing =
+		"Headset Stereophone", "HSOL",
+		"Headset Stereophone", "HSOR",
+		"Ext Spk", "HFL",
+		"Ext Spk", "HFR",
+		"Line Out", "AUXL",
+		"Line Out", "AUXR",
+		"AFML", "Line In",
+		"AFMR", "Line In";
+};
+
+/* PandaboardES has external pullups on SCL & SDA */
+&dss_hdmi_pins {
+	pinctrl-single,pins = <
+		0x5a (PIN_INPUT_PULLUP | MUX_MODE0)	/* hdmi_cec.hdmi_cec */
+		0x5c (PIN_INPUT | MUX_MODE0)		/* hdmi_scl.hdmi_scl */
+		0x5e (PIN_INPUT | MUX_MODE0)		/* hdmi_sda.hdmi_sda */
+		>;
+};
+
+&omap4_pmx_core {
+	led_gpio_pins: gpio_led_pmx {
+		pinctrl-single,pins = <
+			0xb6 (PIN_OUTPUT | MUX_MODE3)	/* gpio_110 */
+		>;
+	};
+};
+
+&led_wkgpio_pins {
+	pinctrl-single,pins = <
+		0x1c (PIN_OUTPUT | MUX_MODE3)	/* gpio_wk8 */
+	>;
+};
+
+&leds {
+	pinctrl-0 = <
+		&led_gpio_pins
+		&led_wkgpio_pins
+	>;
+
+	heartbeat {
+		gpios = <&gpio4 14 GPIO_ACTIVE_HIGH>;
+	};
+	mmc {
+		gpios = <&gpio1 8 GPIO_ACTIVE_HIGH>;
+	};
+};
+
+&gpio1 {
+	 ti,no-reset-on-init;
+};
diff --git a/arch/arm/boot/dts/omap4-panda-es.dts b/arch/arm/boot/dts/omap4-panda-es.dts
index 119f8e6..5c54ccf 100644
--- a/arch/arm/boot/dts/omap4-panda-es.dts
+++ b/arch/arm/boot/dts/omap4-panda-es.dts
@@ -15,6 +15,18 @@
 	compatible = "ti,omap4-panda-es", "ti,omap4-panda", "ti,omap4460", "ti,omap4430", "ti,omap4";
 };
 
+&emif1 {
+	cs1-used;
+	device-handle = <&elpida_ECB240ABACN>;
+	status = "ok";
+};
+
+&emif2 {
+	cs1-used;
+	device-handle = <&elpida_ECB240ABACN>;
+	status = "ok";
+};
+
 /* Audio routing is differnet between PandaBoard4430 and PandaBoardES */
 &sound {
 	ti,model = "PandaBoardES";
diff --git a/arch/arm/boot/dts/omap4-panda.dts b/arch/arm/boot/dts/omap4-panda.dts
index a0e28b2..3ee41ef 100644
--- a/arch/arm/boot/dts/omap4-panda.dts
+++ b/arch/arm/boot/dts/omap4-panda.dts
@@ -14,3 +14,15 @@
 	model = "TI OMAP4 PandaBoard";
 	compatible = "ti,omap4-panda", "ti,omap4430", "ti,omap4";
 };
+
+&emif1 {
+	cs1-used;
+	device-handle = <&elpida_ECB240ABACN>;
+	status = "ok";
+};
+
+&emif2 {
+	cs1-used;
+	device-handle = <&elpida_ECB240ABACN>;
+	status = "ok";
+};
diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts
index d728ec9..fb2cda4 100644
--- a/arch/arm/boot/dts/omap4-sdp.dts
+++ b/arch/arm/boot/dts/omap4-sdp.dts
@@ -502,11 +502,13 @@
 &emif1 {
 	cs1-used;
 	device-handle = <&elpida_ECB240ABACN>;
+	status = "ok";
 };
 
 &emif2 {
 	cs1-used;
 	device-handle = <&elpida_ECB240ABACN>;
+	status = "ok";
 };
 
 &keypad {
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index 9c289dd..ea0b76b 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -696,6 +696,7 @@
 			hw-caps-read-idle-ctrl;
 			hw-caps-ll-interface;
 			hw-caps-temp-alert;
+			status = "disabled";
 		};
 
 		emif2: emif@4d000000 {
@@ -708,6 +709,7 @@
 			hw-caps-read-idle-ctrl;
 			hw-caps-ll-interface;
 			hw-caps-temp-alert;
+			status = "disabled";
 		};
 
 		ocp2scp@4a0ad000 {
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
index 5c9b5bf..7c9335d 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
@@ -94,6 +94,17 @@
 	status = "okay";
 };
 
+&emac {
+	phy = <&phy1>;
+	phy-mode = "mii";
+	allwinner,use-internal-phy;
+	allwinner,leds-active-low;
+	status = "okay";
+	phy1: ethernet-phy@1 {
+		reg = <1>;
+	};
+};
+
 &mmc0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
index 3ec9712..21acd8a 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
@@ -183,3 +183,14 @@
 	/* USB VBUS is always on */
 	status = "okay";
 };
+
+&emac {
+	phy = <&phy1>;
+	phy-mode = "mii";
+	allwinner,use-internal-phy;
+	allwinner,leds-active-low;
+	status = "okay";
+	phy1: ethernet-phy@1 {
+		reg = <1>;
+	};
+};
diff --git a/arch/arm/boot/dts/sun8i-h3.dtsi b/arch/arm/boot/dts/sun8i-h3.dtsi
index f4ba088..43c4a5c 100644
--- a/arch/arm/boot/dts/sun8i-h3.dtsi
+++ b/arch/arm/boot/dts/sun8i-h3.dtsi
@@ -50,6 +50,10 @@
 / {
 	interrupt-parent = <&gic>;
 
+	aliases {
+		ethernet0 = &emac;
+	};
+
 	cpus {
 		#address-cells = <1>;
 		#size-cells = <0>;
@@ -530,6 +534,20 @@
 			#size-cells = <0>;
 		};
 
+		emac: ethernet@1c30000 {
+			compatible = "allwinner,sun8i-h3-emac";
+			reg = <0x01c30000 0x104>, <0x01c00030 0x4>;
+			reg-names = "emac", "syscon";
+			interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+			resets = <&ccu RST_BUS_EMAC>, <&ccu RST_BUS_EPHY>;
+			reset-names = "ahb", "ephy";
+			clocks = <&ccu CLK_BUS_EMAC>, <&ccu CLK_BUS_EPHY>;
+			clock-names = "ahb", "ephy";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
 		gic: interrupt-controller@01c81000 {
 			compatible = "arm,cortex-a7-gic", "arm,cortex-a15-gic";
 			reg = <0x01c81000 0x1000>,
diff --git a/arch/arm/boot/dts/tps65217.dtsi b/arch/arm/boot/dts/tps65217.dtsi
index a632724..02de56b 100644
--- a/arch/arm/boot/dts/tps65217.dtsi
+++ b/arch/arm/boot/dts/tps65217.dtsi
@@ -13,6 +13,18 @@
 
 &tps {
 	compatible = "ti,tps65217";
+	interrupt-controller;
+	#interrupt-cells = <1>;
+
+	charger {
+		compatible = "ti,tps65217-charger";
+		status = "disabled";
+	};
+
+	pwrbutton {
+		compatible = "ti,tps65217-pwrbutton";
+		status = "disabled";
+	};
 
 	regulators {
 		#address-cells = <1>;
diff --git a/arch/arm/mach-imx/devices/Kconfig b/arch/arm/mach-imx/devices/Kconfig
index 6ffe572..bb7bd88 100644
--- a/arch/arm/mach-imx/devices/Kconfig
+++ b/arch/arm/mach-imx/devices/Kconfig
@@ -68,3 +68,9 @@ config IMX_HAVE_PLATFORM_SDHCI_ESDHC_IMX
 
 config IMX_HAVE_PLATFORM_SPI_IMX
 	bool
+
+config WAND_RFKILL
+	tristate "Wandboard RF Kill support"
+	depends on SOC_IMX6Q
+	default m
+	select RFKILL
diff --git a/arch/arm/mach-imx/devices/Makefile b/arch/arm/mach-imx/devices/Makefile
index aa6cee8..800ce9b 100644
--- a/arch/arm/mach-imx/devices/Makefile
+++ b/arch/arm/mach-imx/devices/Makefile
@@ -25,3 +25,4 @@ obj-$(CONFIG_IMX_HAVE_PLATFORM_MXC_W1) += platform-mxc_w1.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_SDHCI_ESDHC_IMX) += platform-sdhci-esdhc-imx.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_SPI_IMX) +=  platform-spi_imx.o
 obj-$(CONFIG_IMX_HAVE_PLATFORM_MX2_EMMA) += platform-mx2-emma.o
+obj-$(CONFIG_WAND_RFKILL) += wand-rfkill.o
diff --git b/arch/arm/mach-imx/devices/wand-rfkill.c b/arch/arm/mach-imx/devices/wand-rfkill.c
new file mode 100644
index 0000000..da7ef9f
--- /dev/null
+++ b/arch/arm/mach-imx/devices/wand-rfkill.c
@@ -0,0 +1,290 @@
+/*
+ * arch/arm/mach-imx/devices/wand-rfkill.c
+ *
+ * Copyright (C) 2013 Vladimir Ermakov <vooon341@gmail.com>
+ *
+ * based on net/rfkill/rfkill-gpio.c
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/rfkill.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+
+struct wand_rfkill_data {
+	struct rfkill *rfkill_dev;
+	int shutdown_gpio;
+	const char *shutdown_name;
+};
+
+static int wand_rfkill_set_block(void *data, bool blocked)
+{
+	struct wand_rfkill_data *rfkill = data;
+
+	pr_debug("wandboard-rfkill: set block %d\n", blocked);
+
+	if (blocked) {
+		if (gpio_is_valid(rfkill->shutdown_gpio))
+			gpio_direction_output(rfkill->shutdown_gpio, 0);
+	} else {
+		if (gpio_is_valid(rfkill->shutdown_gpio))
+			gpio_direction_output(rfkill->shutdown_gpio, 1);
+	}
+
+	return 0;
+}
+
+static const struct rfkill_ops wand_rfkill_ops = {
+	.set_block = wand_rfkill_set_block,
+};
+
+static int wand_rfkill_wifi_probe(struct device *dev,
+		struct device_node *np,
+		struct wand_rfkill_data *rfkill)
+{
+	int ret;
+	int wl_ref_on, wl_rst_n, wl_reg_on, wl_wake, wl_host_wake;
+
+	wl_ref_on = of_get_named_gpio(np, "wifi-ref-on", 0);
+	wl_rst_n = of_get_named_gpio(np, "wifi-rst-n", 0);
+	wl_reg_on = of_get_named_gpio(np, "wifi-reg-on", 0);
+	wl_wake = of_get_named_gpio(np, "wifi-wake", 0);
+	wl_host_wake = of_get_named_gpio(np, "wifi-host-wake", 0);
+
+	if (!gpio_is_valid(wl_rst_n) || !gpio_is_valid(wl_ref_on) ||
+			!gpio_is_valid(wl_reg_on) || !gpio_is_valid(wl_wake) ||
+			!gpio_is_valid(wl_host_wake)) {
+
+		dev_err(dev, "incorrect wifi gpios (%d %d %d %d %d)\n",
+				wl_rst_n, wl_ref_on, wl_reg_on, wl_wake, wl_host_wake);
+		return -EINVAL;
+	}
+
+	dev_info(dev, "initialize wifi chip\n");
+
+	gpio_request(wl_rst_n, "wl_rst_n");
+	gpio_direction_output(wl_rst_n, 0);
+	msleep(11);
+	gpio_set_value(wl_rst_n, 1);
+
+	gpio_request(wl_ref_on, "wl_ref_on");
+	gpio_direction_output(wl_ref_on, 1);
+
+	gpio_request(wl_reg_on, "wl_reg_on");
+	gpio_direction_output(wl_reg_on, 1);
+
+	gpio_request(wl_wake, "wl_wake");
+	gpio_direction_output(wl_wake, 1);
+
+	gpio_request(wl_host_wake, "wl_host_wake");
+	gpio_direction_input(wl_host_wake);
+
+	rfkill->shutdown_name = "wifi_shutdown";
+	rfkill->shutdown_gpio = wl_wake;
+
+	rfkill->rfkill_dev = rfkill_alloc("wifi-rfkill", dev, RFKILL_TYPE_WLAN,
+			&wand_rfkill_ops, rfkill);
+	if (!rfkill->rfkill_dev) {
+		ret = -ENOMEM;
+		goto wifi_fail_free_gpio;
+	}
+
+	ret = rfkill_register(rfkill->rfkill_dev);
+	if (ret < 0)
+		goto wifi_fail_unregister;
+
+	dev_info(dev, "wifi-rfkill registered.\n");
+
+	return 0;
+
+wifi_fail_unregister:
+	rfkill_destroy(rfkill->rfkill_dev);
+wifi_fail_free_gpio:
+	if (gpio_is_valid(wl_rst_n))     gpio_free(wl_rst_n);
+	if (gpio_is_valid(wl_ref_on))    gpio_free(wl_ref_on);
+	if (gpio_is_valid(wl_reg_on))    gpio_free(wl_reg_on);
+	if (gpio_is_valid(wl_wake))      gpio_free(wl_wake);
+	if (gpio_is_valid(wl_host_wake)) gpio_free(wl_host_wake);
+
+	return ret;
+}
+
+static int wand_rfkill_bt_probe(struct device *dev,
+		struct device_node *np,
+		struct wand_rfkill_data *rfkill)
+{
+	int ret;
+	int bt_on, bt_wake, bt_host_wake;
+
+	bt_on = of_get_named_gpio(np, "bluetooth-on", 0);
+	bt_wake = of_get_named_gpio(np, "bluetooth-wake", 0);
+	bt_host_wake = of_get_named_gpio(np, "bluetooth-host-wake", 0);
+
+	if (!gpio_is_valid(bt_on) || !gpio_is_valid(bt_wake) ||
+			!gpio_is_valid(bt_host_wake)) {
+
+		dev_err(dev, "incorrect bt gpios (%d %d %d)\n",
+				bt_on, bt_wake, bt_host_wake);
+		return -EINVAL;
+	}
+
+	dev_info(dev, "initialize bluetooth chip\n");
+
+	gpio_request(bt_on, "bt_on");
+	gpio_direction_output(bt_on, 0);
+	msleep(11);
+	gpio_set_value(bt_on, 1);
+
+	gpio_request(bt_wake, "bt_wake");
+	gpio_direction_output(bt_wake, 1);
+
+	gpio_request(bt_host_wake, "bt_host_wake");
+	gpio_direction_input(bt_host_wake);
+
+	rfkill->shutdown_name = "bluetooth_shutdown";
+	rfkill->shutdown_gpio = bt_wake;
+
+	rfkill->rfkill_dev = rfkill_alloc("bluetooth-rfkill", dev, RFKILL_TYPE_BLUETOOTH,
+			&wand_rfkill_ops, rfkill);
+	if (!rfkill->rfkill_dev) {
+		ret = -ENOMEM;
+		goto bt_fail_free_gpio;
+	}
+
+	ret = rfkill_register(rfkill->rfkill_dev);
+	if (ret < 0)
+		goto bt_fail_unregister;
+
+	dev_info(dev, "bluetooth-rfkill registered.\n");
+
+	return 0;
+
+bt_fail_unregister:
+	rfkill_destroy(rfkill->rfkill_dev);
+bt_fail_free_gpio:
+	if (gpio_is_valid(bt_on))        gpio_free(bt_on);
+	if (gpio_is_valid(bt_wake))      gpio_free(bt_wake);
+	if (gpio_is_valid(bt_host_wake)) gpio_free(bt_host_wake);
+
+	return ret;
+}
+
+static int wand_rfkill_probe(struct platform_device *pdev)
+{
+	struct wand_rfkill_data *rfkill;
+	struct pinctrl *pinctrl;
+	int ret;
+
+	dev_info(&pdev->dev, "Wandboard rfkill initialization\n");
+
+	if (!pdev->dev.of_node) {
+		dev_err(&pdev->dev, "no device tree node\n");
+		return -ENODEV;
+	}
+
+	rfkill = kzalloc(sizeof(*rfkill) * 2, GFP_KERNEL);
+	if (!rfkill)
+		return -ENOMEM;
+
+	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
+	if (IS_ERR(pinctrl)) {
+		int ret = PTR_ERR(pinctrl);
+		dev_err(&pdev->dev, "failed to get default pinctrl: %d\n", ret);
+		return ret;
+	}
+
+	/* setup WiFi */
+	ret = wand_rfkill_wifi_probe(&pdev->dev, pdev->dev.of_node, &rfkill[0]);
+	if (ret < 0)
+		goto fail_free_rfkill;
+
+	/* setup bluetooth */
+	ret = wand_rfkill_bt_probe(&pdev->dev, pdev->dev.of_node, &rfkill[1]);
+	if (ret < 0)
+		goto fail_unregister_wifi;
+
+	platform_set_drvdata(pdev, rfkill);
+
+	return 0;
+
+fail_unregister_wifi:
+	if (rfkill[1].rfkill_dev) {
+		rfkill_unregister(rfkill[1].rfkill_dev);
+		rfkill_destroy(rfkill[1].rfkill_dev);
+	}
+
+	/* TODO free gpio */
+
+fail_free_rfkill:
+	kfree(rfkill);
+
+	return ret;
+}
+
+static int wand_rfkill_remove(struct platform_device *pdev)
+{
+	struct wand_rfkill_data *rfkill = platform_get_drvdata(pdev);
+
+	dev_info(&pdev->dev, "Module unloading\n");
+
+	if (!rfkill)
+		return 0;
+
+	/* WiFi */
+	if (gpio_is_valid(rfkill[0].shutdown_gpio))
+		gpio_free(rfkill[0].shutdown_gpio);
+
+	rfkill_unregister(rfkill[0].rfkill_dev);
+	rfkill_destroy(rfkill[0].rfkill_dev);
+
+	/* Bt */
+	if (gpio_is_valid(rfkill[1].shutdown_gpio))
+		gpio_free(rfkill[1].shutdown_gpio);
+
+	rfkill_unregister(rfkill[1].rfkill_dev);
+	rfkill_destroy(rfkill[1].rfkill_dev);
+
+	kfree(rfkill);
+
+	return 0;
+}
+
+static struct of_device_id wand_rfkill_match[] = {
+	{ .compatible = "wand,imx6q-wandboard-rfkill", },
+	{ .compatible = "wand,imx6dl-wandboard-rfkill", },
+	{ .compatible = "wand,imx6qdl-wandboard-rfkill", },
+	{}
+};
+
+static struct platform_driver wand_rfkill_driver = {
+	.driver = {
+		.name = "wandboard-rfkill",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(wand_rfkill_match),
+	},
+	.probe = wand_rfkill_probe,
+	.remove = wand_rfkill_remove
+};
+
+module_platform_driver(wand_rfkill_driver);
+
+MODULE_AUTHOR("Vladimir Ermakov <vooon341@gmail.com>");
+MODULE_DESCRIPTION("Wandboard rfkill driver");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index f989145..0679d78 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -138,8 +138,8 @@ static int omap_device_build_from_dt(struct platform_device *pdev)
 	struct omap_device *od;
 	struct omap_hwmod *oh;
 	struct device_node *node = pdev->dev.of_node;
-	const char *oh_name;
-	int oh_cnt, i, ret = 0;
+	const char *oh_name, *rst_name;
+	int oh_cnt, dstr_cnt, i, ret = 0;
 	bool device_active = false;
 
 	oh_cnt = of_property_count_strings(node, "ti,hwmods");
@@ -190,6 +190,26 @@ static int omap_device_build_from_dt(struct platform_device *pdev)
 		omap_device_enable(pdev);
 		pm_runtime_set_active(&pdev->dev);
 	}
+	dstr_cnt =
+		of_property_count_strings(node, "ti,deassert-hard-reset");
+	if (dstr_cnt > 0) {
+		for (i = 0; i < dstr_cnt; i += 2) {
+			of_property_read_string_index(
+				node, "ti,deassert-hard-reset", i,
+				&oh_name);
+			of_property_read_string_index(
+				node, "ti,deassert-hard-reset", i+1,
+				&rst_name);
+			oh = omap_hwmod_lookup(oh_name);
+			if (!oh) {
+				dev_warn(&pdev->dev,
+				"Cannot parse deassert property for '%s'\n",
+				oh_name);
+				break;
+			}
+			omap_hwmod_deassert_hardreset(oh, rst_name);
+		}
+	}
 
 odbfd_exit1:
 	kfree(hwmods);
@@ -206,12 +226,21 @@ static int _omap_device_notifier_call(struct notifier_block *nb,
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct omap_device *od;
-	int err;
+	int i, err;
 
 	switch (event) {
 	case BUS_NOTIFY_REMOVED_DEVICE:
-		if (pdev->archdata.od)
-			omap_device_delete(pdev->archdata.od);
+		od = to_omap_device(pdev);
+		if (!od)
+			break;
+
+		for (i = 0; i < od->hwmods_cnt; i++) {
+			/* shutdown hwmods */
+			omap_hwmod_shutdown(od->hwmods[i]);
+			/* we don't remove clocks cause there's no API to do so */
+			/* no harm done, since they will not be created next time */
+		}
+		omap_device_delete(od);
 		break;
 	case BUS_NOTIFY_UNBOUND_DRIVER:
 		od = to_omap_device(pdev);
@@ -793,6 +822,8 @@ int omap_device_idle(struct platform_device *pdev)
 	struct omap_device *od;
 
 	od = to_omap_device(pdev);
+	if (!od)
+		return 0;
 
 	if (od->_state != OMAP_DEVICE_STATE_ENABLED) {
 		dev_warn(&pdev->dev,
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 6441dfd..f7a5fb4 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -93,6 +93,8 @@ struct opp_table *_find_opp_table(struct device *dev)
  * Return: voltage in micro volt corresponding to the opp, else
  * return 0
  *
+ * This is useful only for devices with single power supply.
+ *
  * Locking: This function must be called under rcu_read_lock(). opp is a rcu
  * protected pointer. This means that opp which could have been fetched by
  * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
@@ -112,7 +114,7 @@ unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 	if (IS_ERR_OR_NULL(tmp_opp))
 		pr_err("%s: Invalid parameters\n", __func__);
 	else
-		v = tmp_opp->u_volt;
+		v = tmp_opp->supplies[0].u_volt;
 
 	return v;
 }
@@ -210,6 +212,24 @@ unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
 
+static int _get_regulator_count(struct device *dev)
+{
+	struct opp_table *opp_table;
+	int count;
+
+	rcu_read_lock();
+
+	opp_table = _find_opp_table(dev);
+	if (!IS_ERR(opp_table))
+		count = opp_table->regulator_count;
+	else
+		count = 0;
+
+	rcu_read_unlock();
+
+	return count;
+}
+
 /**
  * dev_pm_opp_get_max_volt_latency() - Get max voltage latency in nanoseconds
  * @dev: device for which we do this operation
@@ -222,34 +242,51 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 {
 	struct opp_table *opp_table;
 	struct dev_pm_opp *opp;
-	struct regulator *reg;
+	struct regulator *reg, **regulators;
 	unsigned long latency_ns = 0;
-	unsigned long min_uV = ~0, max_uV = 0;
-	int ret;
+	int ret, i, count;
+	struct {
+		unsigned long min;
+		unsigned long max;
+	} *uV;
+
+	count = _get_regulator_count(dev);
+
+	/* Regulator may not be required for the device */
+	if (!count)
+		return 0;
+
+	regulators = kmalloc_array(count, sizeof(*regulators), GFP_KERNEL);
+	if (!regulators)
+		return 0;
+
+	uV = kmalloc_array(count, sizeof(*uV), GFP_KERNEL);
+	if (!uV)
+		goto free_regulators;
 
 	rcu_read_lock();
 
 	opp_table = _find_opp_table(dev);
 	if (IS_ERR(opp_table)) {
 		rcu_read_unlock();
-		return 0;
+		goto free_uV;
 	}
 
-	reg = opp_table->regulator;
-	if (IS_ERR(reg)) {
-		/* Regulator may not be required for device */
-		rcu_read_unlock();
-		return 0;
-	}
+	memcpy(regulators, opp_table->regulators, count * sizeof(*regulators));
 
-	list_for_each_entry_rcu(opp, &opp_table->opp_list, node) {
-		if (!opp->available)
-			continue;
+	for (i = 0; i < count; i++) {
+		uV[i].min = ~0;
+		uV[i].max = 0;
 
-		if (opp->u_volt_min < min_uV)
-			min_uV = opp->u_volt_min;
-		if (opp->u_volt_max > max_uV)
-			max_uV = opp->u_volt_max;
+		list_for_each_entry_rcu(opp, &opp_table->opp_list, node) {
+			if (!opp->available)
+				continue;
+
+			if (opp->supplies[i].u_volt_min < uV[i].min)
+				uV[i].min = opp->supplies[i].u_volt_min;
+			if (opp->supplies[i].u_volt_max > uV[i].max)
+				uV[i].max = opp->supplies[i].u_volt_max;
+		}
 	}
 
 	rcu_read_unlock();
@@ -258,9 +295,16 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 	 * The caller needs to ensure that opp_table (and hence the regulator)
 	 * isn't freed, while we are executing this routine.
 	 */
-	ret = regulator_set_voltage_time(reg, min_uV, max_uV);
-	if (ret > 0)
-		latency_ns = ret * 1000;
+	for (i = 0; reg = regulators[i], i < count; i++) {
+		ret = regulator_set_voltage_time(reg, uV[i].min, uV[i].max);
+		if (ret > 0)
+			latency_ns += ret * 1000;
+	}
+
+free_uV:
+	kfree(uV);
+free_regulators:
+	kfree(regulators);
 
 	return latency_ns;
 }
@@ -542,8 +586,7 @@ unlock:
 }
 
 static int _set_opp_voltage(struct device *dev, struct regulator *reg,
-			    unsigned long u_volt, unsigned long u_volt_min,
-			    unsigned long u_volt_max)
+			    struct dev_pm_opp_supply *supply)
 {
 	int ret;
 
@@ -554,14 +597,78 @@ static int _set_opp_voltage(struct device *dev, struct regulator *reg,
 		return 0;
 	}
 
-	dev_dbg(dev, "%s: voltages (mV): %lu %lu %lu\n", __func__, u_volt_min,
-		u_volt, u_volt_max);
+	dev_dbg(dev, "%s: voltages (mV): %lu %lu %lu\n", __func__,
+		supply->u_volt_min, supply->u_volt, supply->u_volt_max);
 
-	ret = regulator_set_voltage_triplet(reg, u_volt_min, u_volt,
-					    u_volt_max);
+	ret = regulator_set_voltage_triplet(reg, supply->u_volt_min,
+					    supply->u_volt, supply->u_volt_max);
 	if (ret)
 		dev_err(dev, "%s: failed to set voltage (%lu %lu %lu mV): %d\n",
-			__func__, u_volt_min, u_volt, u_volt_max, ret);
+			__func__, supply->u_volt_min, supply->u_volt,
+			supply->u_volt_max, ret);
+
+	return ret;
+}
+
+static inline int
+_generic_set_opp_clk_only(struct device *dev, struct clk *clk,
+			  unsigned long old_freq, unsigned long freq)
+{
+	int ret;
+
+	ret = clk_set_rate(clk, freq);
+	if (ret) {
+		dev_err(dev, "%s: failed to set clock rate: %d\n", __func__,
+			ret);
+	}
+
+	return ret;
+}
+
+static int _generic_set_opp(struct dev_pm_set_opp_data *data)
+{
+	struct dev_pm_opp_supply *old_supply = data->old_opp.supplies;
+	struct dev_pm_opp_supply *new_supply = data->new_opp.supplies;
+	unsigned long old_freq = data->old_opp.rate, freq = data->new_opp.rate;
+	struct regulator *reg = data->regulators[0];
+	struct device *dev= data->dev;
+	int ret;
+
+	/* This function only supports single regulator per device */
+	if (WARN_ON(data->regulator_count > 1)) {
+		dev_err(dev, "multiple regulators are not supported\n");
+		return -EINVAL;
+	}
+
+	/* Scaling up? Scale voltage before frequency */
+	if (freq > old_freq) {
+		ret = _set_opp_voltage(dev, reg, new_supply);
+		if (ret)
+			goto restore_voltage;
+	}
+
+	/* Change frequency */
+	ret = _generic_set_opp_clk_only(dev, data->clk, old_freq, freq);
+	if (ret)
+		goto restore_voltage;
+
+	/* Scaling down? Scale voltage after frequency */
+	if (freq < old_freq) {
+		ret = _set_opp_voltage(dev, reg, new_supply);
+		if (ret)
+			goto restore_freq;
+	}
+
+	return 0;
+
+restore_freq:
+	if (_generic_set_opp_clk_only(dev, data->clk, freq, old_freq))
+		dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n",
+			__func__, old_freq);
+restore_voltage:
+	/* This shouldn't harm even if the voltages weren't updated earlier */
+	if (old_supply->u_volt)
+		_set_opp_voltage(dev, reg, old_supply);
 
 	return ret;
 }
@@ -579,13 +686,13 @@ static int _set_opp_voltage(struct device *dev, struct regulator *reg,
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
 	struct opp_table *opp_table;
+	unsigned long freq, old_freq;
+	int (*set_opp)(struct dev_pm_set_opp_data *data);
 	struct dev_pm_opp *old_opp, *opp;
-	struct regulator *reg;
+	struct regulator **regulators;
+	struct dev_pm_set_opp_data *data;
 	struct clk *clk;
-	unsigned long freq, old_freq;
-	unsigned long u_volt, u_volt_min, u_volt_max;
-	unsigned long old_u_volt, old_u_volt_min, old_u_volt_max;
-	int ret;
+	int ret, size;
 
 	if (unlikely(!target_freq)) {
 		dev_err(dev, "%s: Invalid target frequency %lu\n", __func__,
@@ -634,64 +741,54 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 		return ret;
 	}
 
-	if (IS_ERR(old_opp)) {
-		old_u_volt = 0;
-	} else {
-		old_u_volt = old_opp->u_volt;
-		old_u_volt_min = old_opp->u_volt_min;
-		old_u_volt_max = old_opp->u_volt_max;
-	}
-
-	u_volt = opp->u_volt;
-	u_volt_min = opp->u_volt_min;
-	u_volt_max = opp->u_volt_max;
-
-	reg = opp_table->regulator;
+	dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n", __func__,
+		old_freq, freq);
 
-	rcu_read_unlock();
+	regulators = opp_table->regulators;
 
-	/* Scaling up? Scale voltage before frequency */
-	if (freq > old_freq) {
-		ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min,
-				       u_volt_max);
-		if (ret)
-			goto restore_voltage;
-	}
+	/* Only frequency scaling */
+	if (!regulators) {
+		unsigned long u_volt = opp->supplies[0].u_volt;
 
-	/* Change frequency */
+		rcu_read_unlock();
 
-	dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n",
-		__func__, old_freq, freq);
+		/*
+		 * DT contained supply ratings? Consider platform failed to set
+		 * regulators.
+		 */
+		if (unlikely(u_volt)) {
+			dev_err(dev, "%s: Regulator not registered with OPP core\n",
+				__func__);
+			return -EINVAL;
+		}
 
-	ret = clk_set_rate(clk, freq);
-	if (ret) {
-		dev_err(dev, "%s: failed to set clock rate: %d\n", __func__,
-			ret);
-		goto restore_voltage;
+		return _generic_set_opp_clk_only(dev, clk, old_freq, freq);
 	}
 
-	/* Scaling down? Scale voltage after frequency */
-	if (freq < old_freq) {
-		ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min,
-				       u_volt_max);
-		if (ret)
-			goto restore_freq;
-	}
+	if (opp_table->set_opp)
+		set_opp = opp_table->set_opp;
+	else
+		set_opp = _generic_set_opp;
+
+	data = opp_table->set_opp_data;
+	data->regulators = regulators;
+	data->regulator_count = opp_table->regulator_count;
+	data->clk = clk;
+	data->dev = dev;
+
+	data->old_opp.rate = old_freq;
+	size = sizeof(*opp->supplies) * opp_table->regulator_count;
+	if (IS_ERR(old_opp))
+		memset(data->old_opp.supplies, 0, size);
+	else
+		memcpy(data->old_opp.supplies, old_opp->supplies, size);
 
-	return 0;
+	data->new_opp.rate = freq;
+	memcpy(data->new_opp.supplies, opp->supplies, size);
 
-restore_freq:
-	if (clk_set_rate(clk, old_freq))
-		dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n",
-			__func__, old_freq);
-restore_voltage:
-	/* This shouldn't harm even if the voltages weren't updated earlier */
-	if (old_u_volt) {
-		_set_opp_voltage(dev, reg, old_u_volt, old_u_volt_min,
-				 old_u_volt_max);
-	}
+	rcu_read_unlock();
 
-	return ret;
+	return set_opp(data);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate);
 
@@ -774,9 +871,6 @@ static struct opp_table *_add_opp_table(struct device *dev)
 
 	_of_init_opp_table(opp_table, dev);
 
-	/* Set regulator to a non-NULL error value */
-	opp_table->regulator = ERR_PTR(-ENXIO);
-
 	/* Find clk for the device */
 	opp_table->clk = clk_get(dev, NULL);
 	if (IS_ERR(opp_table->clk)) {
@@ -825,7 +919,10 @@ static void _remove_opp_table(struct opp_table *opp_table)
 	if (opp_table->prop_name)
 		return;
 
-	if (!IS_ERR(opp_table->regulator))
+	if (opp_table->regulators)
+		return;
+
+	if (opp_table->set_opp)
 		return;
 
 	/* Release clk */
@@ -934,34 +1031,50 @@ struct dev_pm_opp *_allocate_opp(struct device *dev,
 				 struct opp_table **opp_table)
 {
 	struct dev_pm_opp *opp;
+	int count, supply_size;
+	struct opp_table *table;
 
-	/* allocate new OPP node */
-	opp = kzalloc(sizeof(*opp), GFP_KERNEL);
-	if (!opp)
+	table = _add_opp_table(dev);
+	if (!table)
 		return NULL;
 
-	INIT_LIST_HEAD(&opp->node);
+	/* Allocate space for at least one supply */
+	count = table->regulator_count ? table->regulator_count : 1;
+	supply_size = sizeof(*opp->supplies) * count;
 
-	*opp_table = _add_opp_table(dev);
-	if (!*opp_table) {
-		kfree(opp);
+	/* allocate new OPP node and supplies structures */
+	opp = kzalloc(sizeof(*opp) + supply_size, GFP_KERNEL);
+	if (!opp) {
+		kfree(table);
 		return NULL;
 	}
 
+	/* Put the supplies at the end of the OPP structure as an empty array */
+	opp->supplies = (struct dev_pm_opp_supply *)(opp + 1);
+	INIT_LIST_HEAD(&opp->node);
+
+	*opp_table = table;
+
 	return opp;
 }
 
 static bool _opp_supported_by_regulators(struct dev_pm_opp *opp,
 					 struct opp_table *opp_table)
 {
-	struct regulator *reg = opp_table->regulator;
-
-	if (!IS_ERR(reg) &&
-	    !regulator_is_supported_voltage(reg, opp->u_volt_min,
-					    opp->u_volt_max)) {
-		pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n",
-			__func__, opp->u_volt_min, opp->u_volt_max);
-		return false;
+	struct regulator *reg;
+	int i;
+
+	for (i = 0; i < opp_table->regulator_count; i++) {
+		reg = opp_table->regulators[i];
+
+		if (!regulator_is_supported_voltage(reg,
+					opp->supplies[i].u_volt_min,
+					opp->supplies[i].u_volt_max)) {
+			pr_warn("%s: OPP minuV: %lu maxuV: %lu, not supported by regulator\n",
+				__func__, opp->supplies[i].u_volt_min,
+				opp->supplies[i].u_volt_max);
+			return false;
+		}
 	}
 
 	return true;
@@ -993,11 +1106,13 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 
 		/* Duplicate OPPs */
 		dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
-			 __func__, opp->rate, opp->u_volt, opp->available,
-			 new_opp->rate, new_opp->u_volt, new_opp->available);
+			 __func__, opp->rate, opp->supplies[0].u_volt,
+			 opp->available, new_opp->rate,
+			 new_opp->supplies[0].u_volt, new_opp->available);
 
-		return opp->available && new_opp->u_volt == opp->u_volt ?
-			0 : -EEXIST;
+		/* Should we compare voltages for all regulators here ? */
+		return opp->available &&
+		       new_opp->supplies[0].u_volt == opp->supplies[0].u_volt ? 0 : -EEXIST;
 	}
 
 	new_opp->opp_table = opp_table;
@@ -1064,9 +1179,9 @@ int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
 	/* populate the opp table */
 	new_opp->rate = freq;
 	tol = u_volt * opp_table->voltage_tolerance_v1 / 100;
-	new_opp->u_volt = u_volt;
-	new_opp->u_volt_min = u_volt - tol;
-	new_opp->u_volt_max = u_volt + tol;
+	new_opp->supplies[0].u_volt = u_volt;
+	new_opp->supplies[0].u_volt_min = u_volt - tol;
+	new_opp->supplies[0].u_volt_max = u_volt + tol;
 	new_opp->available = true;
 	new_opp->dynamic = dynamic;
 
@@ -1310,13 +1425,47 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
 
+static int _allocate_set_opp_data(struct opp_table *opp_table)
+{
+	struct dev_pm_set_opp_data *data;
+	int len, count = opp_table->regulator_count;
+
+	if (WARN_ON(!count))
+		return -EINVAL;
+
+	/* space for set_opp_data */
+	len = sizeof(*data);
+
+	/* space for old_opp.supplies and new_opp.supplies */
+	len += 2 * sizeof(struct dev_pm_opp_supply) * count;
+
+	data = kzalloc(len, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->old_opp.supplies = (void *)(data + 1);
+	data->new_opp.supplies = data->old_opp.supplies + count;
+
+	opp_table->set_opp_data = data;
+
+	return 0;
+}
+
+static void _free_set_opp_data(struct opp_table *opp_table)
+{
+	kfree(opp_table->set_opp_data);
+	opp_table->set_opp_data = NULL;
+}
+
 /**
- * dev_pm_opp_set_regulator() - Set regulator name for the device
+ * dev_pm_opp_set_regulators() - Set regulator names for the device
  * @dev: Device for which regulator name is being set.
- * @name: Name of the regulator.
+ * @names: Array of pointers to the names of the regulator.
+ * @count: Number of regulators.
  *
  * In order to support OPP switching, OPP layer needs to know the name of the
- * device's regulator, as the core would be required to switch voltages as well.
+ * device's regulators, as the core would be required to switch voltages as
+ * well.
  *
  * This must be called before any OPPs are initialized for the device.
  *
@@ -1326,11 +1475,12 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name)
+int dev_pm_opp_set_regulators(struct device *dev, const char * const names[],
+			      unsigned int count)
 {
 	struct opp_table *opp_table;
 	struct regulator *reg;
-	int ret;
+	int ret, i;
 
 	mutex_lock(&opp_table_lock);
 
@@ -1346,38 +1496,62 @@ struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name)
 		goto err;
 	}
 
-	/* Already have a regulator set */
-	if (WARN_ON(!IS_ERR(opp_table->regulator))) {
+	/* Already have regulators set */
+	if (opp_table->regulators) {
 		ret = -EBUSY;
 		goto err;
 	}
-	/* Allocate the regulator */
-	reg = regulator_get_optional(dev, name);
-	if (IS_ERR(reg)) {
-		ret = PTR_ERR(reg);
-		if (ret != -EPROBE_DEFER)
-			dev_err(dev, "%s: no regulator (%s) found: %d\n",
-				__func__, name, ret);
+
+	opp_table->regulators = kmalloc_array(count,
+					      sizeof(*opp_table->regulators),
+					      GFP_KERNEL);
+	if (!opp_table->regulators) {
+		ret = -ENOMEM;
 		goto err;
 	}
 
-	opp_table->regulator = reg;
+	for (i = 0; i < count; i++) {
+		reg = regulator_get_optional(dev, names[i]);
+		if (IS_ERR(reg)) {
+			ret = PTR_ERR(reg);
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "%s: regulator (%s) not found: %d\n",
+					__func__, names[i], ret);
+			goto free_regulators;
+		}
+
+		opp_table->regulators[i] = reg;
+	}
+
+	opp_table->regulator_count = count;
+
+	/* Allocate block only once to pass to set_opp() routines */
+	ret = _allocate_set_opp_data(opp_table);
+	if (ret)
+		goto free_regulators;
 
 	mutex_unlock(&opp_table_lock);
-	return opp_table;
+	return 0;
 
+free_regulators:
+	while (i != 0)
+		regulator_put(opp_table->regulators[--i]);
+
+	kfree(opp_table->regulators);
+	opp_table->regulators = NULL;
+	opp_table->regulator_count = 0;
 err:
 	_remove_opp_table(opp_table);
 unlock:
 	mutex_unlock(&opp_table_lock);
 
-	return ERR_PTR(ret);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulators);
 
 /**
- * dev_pm_opp_put_regulator() - Releases resources blocked for regulator
- * @opp_table: OPP table returned from dev_pm_opp_set_regulator().
+ * dev_pm_opp_put_regulators() - Releases resources blocked for regulators
+ * @dev: Device for which regulators were set.
  *
  * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
@@ -1385,20 +1559,140 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
  */
-void dev_pm_opp_put_regulator(struct opp_table *opp_table)
+void dev_pm_opp_put_regulators(struct device *dev)
 {
+	struct opp_table *opp_table;
+	int i;
+
 	mutex_lock(&opp_table_lock);
 
-	if (IS_ERR(opp_table->regulator)) {
-		pr_err("%s: Doesn't have regulator set\n", __func__);
+	/* Check for existing table for 'dev' first */
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
+		dev_err(dev, "Failed to find opp_table: %ld\n",
+			PTR_ERR(opp_table));
+		goto unlock;
+	}
+
+	if (!opp_table->regulators) {
+		dev_err(dev, "%s: Doesn't have regulators set\n", __func__);
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating opp_table */
+	WARN_ON(!list_empty(&opp_table->opp_list));
+
+	for (i = opp_table->regulator_count - 1; i >= 0; i--)
+		regulator_put(opp_table->regulators[i]);
+
+	_free_set_opp_data(opp_table);
+
+	kfree(opp_table->regulators);
+	opp_table->regulators = NULL;
+	opp_table->regulator_count = 0;
+
+	/* Try freeing opp_table if this was the last blocking resource */
+	_remove_opp_table(opp_table);
+
+unlock:
+	mutex_unlock(&opp_table_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulators);
+
+/**
+ * dev_pm_opp_register_set_opp_helper() - Register custom set OPP helper
+ * @dev: Device for which the helper is getting registered.
+ * @set_opp: Custom set OPP helper.
+ *
+ * This is useful to support complex platforms (like platforms with multiple
+ * regulators per device), instead of the generic OPP set rate helper.
+ *
+ * This must be called before any OPPs are initialized for the device.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_register_set_opp_helper(struct device *dev,
+			int (*set_opp)(struct dev_pm_set_opp_data *data))
+{
+	struct opp_table *opp_table;
+	int ret;
+
+	if (!set_opp)
+		return -EINVAL;
+
+	mutex_lock(&opp_table_lock);
+
+	opp_table = _add_opp_table(dev);
+	if (!opp_table) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* This should be called before OPPs are initialized */
+	if (WARN_ON(!list_empty(&opp_table->opp_list))) {
+		ret = -EBUSY;
+		goto err;
+	}
+
+	/* Already have custom set_opp helper */
+	if (WARN_ON(opp_table->set_opp)) {
+		ret = -EBUSY;
+		goto err;
+	}
+
+	opp_table->set_opp = set_opp;
+
+	mutex_unlock(&opp_table_lock);
+	return 0;
+
+err:
+	_remove_opp_table(opp_table);
+unlock:
+	mutex_unlock(&opp_table_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_register_set_opp_helper);
+
+/**
+ * dev_pm_opp_register_put_opp_helper() - Releases resources blocked for
+ *					   set_opp helper
+ * @dev: Device for which custom set_opp helper has to be cleared.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_register_put_opp_helper(struct device *dev)
+{
+	struct opp_table *opp_table;
+
+	mutex_lock(&opp_table_lock);
+
+	/* Check for existing table for 'dev' first */
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
+		dev_err(dev, "Failed to find opp_table: %ld\n",
+			PTR_ERR(opp_table));
+		goto unlock;
+	}
+
+	if (!opp_table->set_opp) {
+		dev_err(dev, "%s: Doesn't have custom set_opp helper set\n",
+			__func__);
 		goto unlock;
 	}
 
 	/* Make sure there are no concurrent readers while updating opp_table */
 	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	regulator_put(opp_table->regulator);
-	opp_table->regulator = ERR_PTR(-ENXIO);
+	opp_table->set_opp = NULL;
 
 	/* Try freeing opp_table if this was the last blocking resource */
 	_remove_opp_table(opp_table);
@@ -1406,7 +1700,7 @@ void dev_pm_opp_put_regulator(struct opp_table *opp_table)
 unlock:
 	mutex_unlock(&opp_table_lock);
 }
-EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator);
+EXPORT_SYMBOL_GPL(dev_pm_opp_register_put_opp_helper);
 
 /**
  * dev_pm_opp_add()  - Add an OPP table from a table definitions
diff --git a/drivers/base/power/opp/debugfs.c b/drivers/base/power/opp/debugfs.c
index ef1ae6b..95f433d 100644
--- a/drivers/base/power/opp/debugfs.c
+++ b/drivers/base/power/opp/debugfs.c
@@ -15,6 +15,7 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/limits.h>
+#include <linux/slab.h>
 
 #include "opp.h"
 
@@ -34,6 +35,46 @@ void opp_debug_remove_one(struct dev_pm_opp *opp)
 	debugfs_remove_recursive(opp->dentry);
 }
 
+static bool opp_debug_create_supplies(struct dev_pm_opp *opp,
+				      struct opp_table *opp_table,
+				      struct dentry *pdentry)
+{
+	struct dentry *d;
+	int i = 0;
+	char *name;
+
+	/* Always create at least supply-0 directory */
+	do {
+		name = kasprintf(GFP_KERNEL, "supply-%d", i);
+
+		/* Create per-opp directory */
+		d = debugfs_create_dir(name, pdentry);
+
+		kfree(name);
+
+		if (!d)
+			return false;
+
+		if (!debugfs_create_ulong("u_volt_target", S_IRUGO, d,
+					  &opp->supplies[i].u_volt))
+			return false;
+
+		if (!debugfs_create_ulong("u_volt_min", S_IRUGO, d,
+					  &opp->supplies[i].u_volt_min))
+			return false;
+
+		if (!debugfs_create_ulong("u_volt_max", S_IRUGO, d,
+					  &opp->supplies[i].u_volt_max))
+			return false;
+
+		if (!debugfs_create_ulong("u_amp", S_IRUGO, d,
+					  &opp->supplies[i].u_amp))
+			return false;
+	} while (++i < opp_table->regulator_count);
+
+	return true;
+}
+
 int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table)
 {
 	struct dentry *pdentry = opp_table->dentry;
@@ -63,16 +104,7 @@ int opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table)
 	if (!debugfs_create_ulong("rate_hz", S_IRUGO, d, &opp->rate))
 		return -ENOMEM;
 
-	if (!debugfs_create_ulong("u_volt_target", S_IRUGO, d, &opp->u_volt))
-		return -ENOMEM;
-
-	if (!debugfs_create_ulong("u_volt_min", S_IRUGO, d, &opp->u_volt_min))
-		return -ENOMEM;
-
-	if (!debugfs_create_ulong("u_volt_max", S_IRUGO, d, &opp->u_volt_max))
-		return -ENOMEM;
-
-	if (!debugfs_create_ulong("u_amp", S_IRUGO, d, &opp->u_amp))
+	if (!opp_debug_create_supplies(opp, opp_table, d))
 		return -ENOMEM;
 
 	if (!debugfs_create_ulong("clock_latency_ns", S_IRUGO, d,
diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c
index 6937944..a9eefc9 100644
--- a/drivers/base/power/opp/of.c
+++ b/drivers/base/power/opp/of.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/device.h>
 #include <linux/of.h>
+#include <linux/slab.h>
 #include <linux/export.h>
 
 #include "opp.h"
@@ -101,16 +102,16 @@ static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
 	return true;
 }
 
-/* TODO: Support multiple regulators */
 static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 			      struct opp_table *opp_table)
 {
-	u32 microvolt[3] = {0};
-	u32 val;
-	int count, ret;
+	u32 *microvolt, *microamp = NULL;
+	int supplies, vcount, icount, ret, i, j;
 	struct property *prop = NULL;
 	char name[NAME_MAX];
 
+	supplies = opp_table->regulator_count ? opp_table->regulator_count : 1;
+
 	/* Search for "opp-microvolt-<name>" */
 	if (opp_table->prop_name) {
 		snprintf(name, sizeof(name), "opp-microvolt-%s",
@@ -128,34 +129,29 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 			return 0;
 	}
 
-	count = of_property_count_u32_elems(opp->np, name);
-	if (count < 0) {
+	vcount = of_property_count_u32_elems(opp->np, name);
+	if (vcount < 0) {
 		dev_err(dev, "%s: Invalid %s property (%d)\n",
-			__func__, name, count);
-		return count;
+			__func__, name, vcount);
+		return vcount;
 	}
 
-	/* There can be one or three elements here */
-	if (count != 1 && count != 3) {
-		dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n",
-			__func__, name, count);
+	/* There can be one or three elements per supply */
+	if (vcount != supplies && vcount != supplies * 3) {
+		dev_err(dev, "%s: Invalid number of elements in %s property (%d) with supplies (%d)\n",
+			__func__, name, vcount, supplies);
 		return -EINVAL;
 	}
 
-	ret = of_property_read_u32_array(opp->np, name, microvolt, count);
+	microvolt = kmalloc_array(vcount, sizeof(*microvolt), GFP_KERNEL);
+	if (!microvolt)
+		return -ENOMEM;
+
+	ret = of_property_read_u32_array(opp->np, name, microvolt, vcount);
 	if (ret) {
 		dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
-		return -EINVAL;
-	}
-
-	opp->u_volt = microvolt[0];
-
-	if (count == 1) {
-		opp->u_volt_min = opp->u_volt;
-		opp->u_volt_max = opp->u_volt;
-	} else {
-		opp->u_volt_min = microvolt[1];
-		opp->u_volt_max = microvolt[2];
+		ret = -EINVAL;
+		goto free_microvolt;
 	}
 
 	/* Search for "opp-microamp-<name>" */
@@ -172,10 +168,59 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
 		prop = of_find_property(opp->np, name, NULL);
 	}
 
-	if (prop && !of_property_read_u32(opp->np, name, &val))
-		opp->u_amp = val;
+	if (prop) {
+		icount = of_property_count_u32_elems(opp->np, name);
+		if (icount < 0) {
+			dev_err(dev, "%s: Invalid %s property (%d)\n", __func__,
+				name, icount);
+			ret = icount;
+			goto free_microvolt;
+		}
 
-	return 0;
+		if (icount != supplies) {
+			dev_err(dev, "%s: Invalid number of elements in %s property (%d) with supplies (%d)\n",
+				__func__, name, icount, supplies);
+			ret = -EINVAL;
+			goto free_microvolt;
+		}
+
+		microamp = kmalloc_array(icount, sizeof(*microamp), GFP_KERNEL);
+		if (!microamp) {
+			ret = -EINVAL;
+			goto free_microvolt;
+		}
+
+		ret = of_property_read_u32_array(opp->np, name, microamp,
+						 icount);
+		if (ret) {
+			dev_err(dev, "%s: error parsing %s: %d\n", __func__,
+				name, ret);
+			ret = -EINVAL;
+			goto free_microamp;
+		}
+	}
+
+	for (i = 0, j = 0; i < supplies; i++) {
+		opp->supplies[i].u_volt = microvolt[j++];
+
+		if (vcount == supplies) {
+			opp->supplies[i].u_volt_min = opp->supplies[i].u_volt;
+			opp->supplies[i].u_volt_max = opp->supplies[i].u_volt;
+		} else {
+			opp->supplies[i].u_volt_min = microvolt[j++];
+			opp->supplies[i].u_volt_max = microvolt[j++];
+		}
+
+		if (microamp)
+			opp->supplies[i].u_amp = microamp[i];
+	}
+
+free_microamp:
+	kfree(microamp);
+free_microvolt:
+	kfree(microvolt);
+
+	return ret;
 }
 
 /**
@@ -198,7 +243,7 @@ void dev_pm_opp_of_remove_table(struct device *dev)
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
 
 /* Returns opp descriptor node for a device, caller must do of_node_put() */
-struct device_node *_of_get_opp_desc_node(struct device *dev)
+struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev)
 {
 	/*
 	 * TODO: Support for multiple OPP tables.
@@ -209,6 +254,7 @@ struct device_node *_of_get_opp_desc_node(struct device *dev)
 
 	return of_parse_phandle(dev->of_node, "operating-points-v2", 0);
 }
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node);
 
 /**
  * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
@@ -303,9 +349,9 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np)
 	mutex_unlock(&opp_table_lock);
 
 	pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
-		 __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt,
-		 new_opp->u_volt_min, new_opp->u_volt_max,
-		 new_opp->clock_latency_ns);
+		 __func__, new_opp->turbo, new_opp->rate,
+		 new_opp->supplies[0].u_volt, new_opp->supplies[0].u_volt_min,
+		 new_opp->supplies[0].u_volt_max, new_opp->clock_latency_ns);
 
 	/*
 	 * Notify the changes in the availability of the operable
@@ -455,7 +501,7 @@ int dev_pm_opp_of_add_table(struct device *dev)
 	 * OPPs have two version of bindings now. The older one is deprecated,
 	 * try for the new binding first.
 	 */
-	opp_np = _of_get_opp_desc_node(dev);
+	opp_np = dev_pm_opp_of_get_opp_desc_node(dev);
 	if (!opp_np) {
 		/*
 		 * Try old-deprecated bindings for backward compatibility with
@@ -565,7 +611,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
 	int cpu, ret = 0;
 
 	/* Get OPP descriptor node */
-	np = _of_get_opp_desc_node(cpu_dev);
+	np = dev_pm_opp_of_get_opp_desc_node(cpu_dev);
 	if (!np) {
 		dev_dbg(cpu_dev, "%s: Couldn't find cpu_dev node.\n", __func__);
 		return -ENOENT;
@@ -590,7 +636,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
 		}
 
 		/* Get OPP descriptor node */
-		tmp_np = _of_get_opp_desc_node(tcpu_dev);
+		tmp_np = dev_pm_opp_of_get_opp_desc_node(tcpu_dev);
 		if (!tmp_np) {
 			dev_err(tcpu_dev, "%s: Couldn't find tcpu_dev node.\n",
 				__func__);
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index fabd5ca..af9f2b8 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -61,10 +61,7 @@ extern struct list_head opp_tables;
  * @turbo:	true if turbo (boost) OPP
  * @suspend:	true if suspend OPP
  * @rate:	Frequency in hertz
- * @u_volt:	Target voltage in microvolts corresponding to this OPP
- * @u_volt_min:	Minimum voltage in microvolts corresponding to this OPP
- * @u_volt_max:	Maximum voltage in microvolts corresponding to this OPP
- * @u_amp:	Maximum current drawn by the device in microamperes
+ * @supplies:	Power supplies voltage/current values
  * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's
  *		frequency from any other OPP's frequency.
  * @opp_table:	points back to the opp_table struct this opp belongs to
@@ -83,10 +80,8 @@ struct dev_pm_opp {
 	bool suspend;
 	unsigned long rate;
 
-	unsigned long u_volt;
-	unsigned long u_volt_min;
-	unsigned long u_volt_max;
-	unsigned long u_amp;
+	struct dev_pm_opp_supply *supplies;
+
 	unsigned long clock_latency_ns;
 
 	struct opp_table *opp_table;
@@ -144,7 +139,10 @@ enum opp_table_access {
  * @supported_hw_count: Number of elements in supported_hw array.
  * @prop_name: A name to postfix to many DT properties, while parsing them.
  * @clk: Device's clock handle
- * @regulator: Supply regulator
+ * @regulators: Supply regulators
+ * @regulator_count: Number of power supply regulators
+ * @set_opp: Platform specific set_opp callback
+ * @set_opp_data: Data to be passed to set_opp callback
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
  *
@@ -179,7 +177,11 @@ struct opp_table {
 	unsigned int supported_hw_count;
 	const char *prop_name;
 	struct clk *clk;
-	struct regulator *regulator;
+	struct regulator **regulators;
+	unsigned int regulator_count;
+
+	int (*set_opp)(struct dev_pm_set_opp_data *data);
+	struct dev_pm_set_opp_data *set_opp_data;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dentry;
@@ -190,7 +192,6 @@ struct opp_table {
 /* Routines internal to opp core */
 struct opp_table *_find_opp_table(struct device *dev);
 struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table);
-struct device_node *_of_get_opp_desc_node(struct device *dev);
 void _dev_pm_opp_remove_table(struct device *dev, bool remove_all);
 struct dev_pm_opp *_allocate_opp(struct device *dev, struct opp_table **opp_table);
 int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table);
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 7875105..bbd5afd 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -120,7 +120,6 @@ config QCOM_EBI2
 config SIMPLE_PM_BUS
 	bool "Simple Power-Managed Bus Driver"
 	depends on OF && PM
-	depends on ARCH_RENESAS || COMPILE_TEST
 	help
 	  Driver for transparent busses that don't need a real driver, but
 	  where the bus controller is part of a PM domain, or under the control
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index bc3917d..a7b3795 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -234,6 +234,17 @@ config ARM_TEGRA124_CPUFREQ
 	help
 	  This adds the CPUFreq driver support for Tegra124 SOCs.
 
+config ARM_TI_CPUFREQ
+	bool "Texas Instruments CPUFreq support"
+	depends on ARCH_OMAP2PLUS
+	help
+	  This driver enables valid OPPs on the running platform based on
+	  values contained within the SoC in use. Enable this in order to
+	  use the cpufreq-dt driver on all Texas Instruments platforms that
+	  provide dt based operating-points-v2 tables with opp-supported-hw
+	  data provided. Required for cpufreq support on AM335x, AM437x,
+	  DRA7x, and AM57x platforms.
+
 config ARM_PXA2xx_CPUFREQ
 	tristate "Intel PXA2xx CPUfreq driver"
 	depends on PXA27x || PXA25x
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 0a9b6a0..5b1b6ec 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_ARM_SPEAR_CPUFREQ)		+= spear-cpufreq.o
 obj-$(CONFIG_ARM_STI_CPUFREQ)		+= sti-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
 obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
+obj-$(CONFIG_ARM_TI_CPUFREQ)		+= ti-cpufreq.o
 obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
 obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o
 obj-$(CONFIG_MACH_MVEBU_V7)		+= mvebu-cpufreq.o
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 7126762..d2637e1 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -72,8 +72,6 @@ static const struct of_device_id machines[] __initconst = {
 
 	{ .compatible = "sigma,tango4" },
 
-	{ .compatible = "ti,am33xx", },
-	{ .compatible = "ti,dra7", },
 	{ .compatible = "ti,omap2", },
 	{ .compatible = "ti,omap3", },
 	{ .compatible = "ti,omap4", },
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 4d3ec92..15cb261 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -28,7 +28,6 @@
 #include "cpufreq-dt.h"
 
 struct private_data {
-	struct opp_table *opp_table;
 	struct device *cpu_dev;
 	struct thermal_cooling_device *cdev;
 	const char *reg_name;
@@ -144,7 +143,6 @@ static int resources_available(void)
 static int cpufreq_init(struct cpufreq_policy *policy)
 {
 	struct cpufreq_frequency_table *freq_table;
-	struct opp_table *opp_table = NULL;
 	struct private_data *priv;
 	struct device *cpu_dev;
 	struct clk *cpu_clk;
@@ -188,9 +186,11 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	 */
 	name = find_supply_name(cpu_dev);
 	if (name) {
-		opp_table = dev_pm_opp_set_regulator(cpu_dev, name);
-		if (IS_ERR(opp_table)) {
-			ret = PTR_ERR(opp_table);
+		const char *names[] = {name};
+
+		ret = dev_pm_opp_set_regulators(cpu_dev, names,
+						ARRAY_SIZE(names));
+		if (ret) {
 			dev_err(cpu_dev, "Failed to set regulator for cpu%d: %d\n",
 				policy->cpu, ret);
 			goto out_put_clk;
@@ -240,7 +240,6 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	}
 
 	priv->reg_name = name;
-	priv->opp_table = opp_table;
 
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
@@ -289,7 +288,7 @@ out_free_priv:
 out_free_opp:
 	dev_pm_opp_of_cpumask_remove_table(policy->cpus);
 	if (name)
-		dev_pm_opp_put_regulator(opp_table);
+		dev_pm_opp_put_regulators(cpu_dev);
 out_put_clk:
 	clk_put(cpu_clk);
 
@@ -304,7 +303,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
 	dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
 	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
 	if (priv->reg_name)
-		dev_pm_opp_put_regulator(priv->opp_table);
+		dev_pm_opp_put_regulators(priv->cpu_dev);
 
 	clk_put(policy->clk);
 	kfree(priv);
diff --git b/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
new file mode 100644
index 0000000..afbaef9
--- /dev/null
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -0,0 +1,288 @@
+/*
+ * TI CPUFreq/OPP hw-supported driver
+ *
+ * Copyright (C) 2016 Texas Instruments, Inc.
+ *	 Dave Gerlach <d-gerlach@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pm_opp.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#define REVISION_MASK				0xF
+#define REVISION_SHIFT				28
+
+#define AM33XX_800M_ARM_MPU_MAX_FREQ		0x1E2F
+#define AM43XX_600M_ARM_MPU_MAX_FREQ		0xFFA
+
+#define DRA7_EFUSE_HAS_OD_MPU_OPP		11
+#define DRA7_EFUSE_HAS_HIGH_MPU_OPP		15
+#define DRA7_EFUSE_HAS_ALL_MPU_OPP		23
+
+#define DRA7_EFUSE_NOM_MPU_OPP			BIT(0)
+#define DRA7_EFUSE_OD_MPU_OPP			BIT(1)
+#define DRA7_EFUSE_HIGH_MPU_OPP			BIT(2)
+
+#define VERSION_COUNT				2
+
+struct ti_cpufreq_data;
+
+struct ti_cpufreq_soc_data {
+	unsigned long (*efuse_xlate)(struct ti_cpufreq_data *opp_data,
+				     unsigned long efuse);
+	unsigned long efuse_fallback;
+};
+
+struct ti_cpufreq_data {
+	struct device *cpu_dev;
+	struct device_node *opp_node;
+	struct regmap *opp_efuse;
+	struct regmap *revision;
+	const struct ti_cpufreq_soc_data *soc_data;
+};
+
+static unsigned long amx3_efuse_xlate(struct ti_cpufreq_data *opp_data,
+				      unsigned long efuse)
+{
+	if (!efuse)
+		efuse = opp_data->soc_data->efuse_fallback;
+	/* AM335x and AM437x use "OPP disable" bits, so invert */
+	return ~efuse;
+}
+
+static unsigned long dra7_efuse_xlate(struct ti_cpufreq_data *opp_data,
+				      unsigned long efuse)
+{
+	unsigned long calculated_efuse = DRA7_EFUSE_NOM_MPU_OPP;
+
+	/*
+	 * The efuse on dra7 and am57 parts contains a specific
+	 * value indicating the highest available OPP.
+	 */
+
+	switch (efuse) {
+	case DRA7_EFUSE_HAS_ALL_MPU_OPP:
+	case DRA7_EFUSE_HAS_HIGH_MPU_OPP:
+		calculated_efuse |= DRA7_EFUSE_HIGH_MPU_OPP;
+	case DRA7_EFUSE_HAS_OD_MPU_OPP:
+		calculated_efuse |= DRA7_EFUSE_OD_MPU_OPP;
+	}
+
+	return calculated_efuse;
+}
+
+static struct ti_cpufreq_soc_data am3x_soc_data = {
+	.efuse_xlate = amx3_efuse_xlate,
+	.efuse_fallback = AM33XX_800M_ARM_MPU_MAX_FREQ,
+};
+
+static struct ti_cpufreq_soc_data am4x_soc_data = {
+	.efuse_xlate = amx3_efuse_xlate,
+	.efuse_fallback = AM43XX_600M_ARM_MPU_MAX_FREQ,
+};
+
+static struct ti_cpufreq_soc_data dra7_soc_data = {
+	.efuse_xlate = dra7_efuse_xlate,
+};
+
+/**
+ * ti_cpufreq_get_efuse() - Parse and return efuse value present on SoC
+ * @opp_data: pointer to ti_cpufreq_data context
+ * @efuse_value: Set to the value parsed from efuse
+ *
+ * Returns error code if efuse not read properly.
+ */
+static int ti_cpufreq_get_efuse(struct ti_cpufreq_data *opp_data,
+				u32 *efuse_value)
+{
+	struct device *dev = opp_data->cpu_dev;
+	struct device_node *np = opp_data->opp_node;
+	unsigned int efuse_offset;
+	u32 efuse, efuse_mask, efuse_shift, vals[4];
+	int ret;
+
+	ret = of_property_read_u32_array(np, "ti,syscon-efuse", vals, 4);
+	if (ret) {
+		dev_err(dev, "ti,syscon-efuse cannot be read %s: %d\n",
+			np->full_name, ret);
+		return ret;
+	}
+
+	efuse_offset = vals[1];
+	efuse_mask = vals[2];
+	efuse_shift = vals[3];
+
+	ret = regmap_read(opp_data->opp_efuse, efuse_offset, &efuse);
+	if (ret) {
+		dev_err(dev,
+			"Failed to read the efuse value from syscon: %d\n",
+			ret);
+		return ret;
+	}
+
+	efuse = (efuse & efuse_mask) >> efuse_shift;
+
+	*efuse_value = opp_data->soc_data->efuse_xlate(opp_data, efuse);
+
+	return 0;
+}
+
+/**
+ * ti_cpufreq_get_rev() - Parse and return rev value present on SoC
+ * @opp_data: pointer to ti_cpufreq_data context
+ * @revision_value: Set to the value parsed from revision register
+ *
+ * Returns error code if revision not read properly.
+ */
+static int ti_cpufreq_get_rev(struct ti_cpufreq_data *opp_data,
+			      u32 *revision_value)
+{
+	struct device *dev = opp_data->cpu_dev;
+	struct device_node *np = opp_data->opp_node;
+	unsigned int revision_offset;
+	u32 revision;
+	int ret;
+
+	ret = of_property_read_u32_index(np, "ti,syscon-rev",
+					 1, &revision_offset);
+	if (ret) {
+		dev_err(dev,
+			"No revision offset provided %s [%d]\n",
+			np->full_name, ret);
+		return ret;
+	}
+
+	ret = regmap_read(opp_data->revision, revision_offset, &revision);
+	if (ret) {
+		dev_err(dev,
+			"Failed to read the revision number from syscon: %d\n",
+			ret);
+		return ret;
+	}
+
+	*revision_value = BIT((revision >> REVISION_SHIFT) & REVISION_MASK);
+
+	return 0;
+}
+
+static int ti_cpufreq_setup_syscon_registers(struct ti_cpufreq_data *opp_data)
+{
+	struct device *dev = opp_data->cpu_dev;
+	struct device_node *np = opp_data->opp_node;
+
+	opp_data->opp_efuse = syscon_regmap_lookup_by_phandle(np,
+							"ti,syscon-efuse");
+	if (IS_ERR(opp_data->opp_efuse)) {
+		dev_err(dev,
+			"\"ti,syscon-efuse\" is missing, cannot use OPPv2 table.\n");
+		return PTR_ERR(opp_data->opp_efuse);
+	}
+
+	opp_data->revision = syscon_regmap_lookup_by_phandle(np,
+							"ti,syscon-rev");
+	if (IS_ERR(opp_data->revision)) {
+		dev_err(dev,
+			"\"ti,syscon-rev\" is missing, cannot use OPPv2 table.\n");
+		return PTR_ERR(opp_data->revision);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id ti_cpufreq_of_match[] = {
+	{ .compatible = "ti,am33xx", .data = &am3x_soc_data, },
+	{ .compatible = "ti,am4372", .data = &am4x_soc_data, },
+	{ .compatible = "ti,dra7", .data = &dra7_soc_data },
+	{},
+};
+
+static int ti_cpufreq_init(void)
+{
+	u32 version[VERSION_COUNT];
+	struct device_node *np;
+	const struct of_device_id *match;
+	struct ti_cpufreq_data *opp_data;
+	int ret;
+
+	np = of_find_node_by_path("/");
+	match = of_match_node(ti_cpufreq_of_match, np);
+	if (!match)
+		return -ENODEV;
+
+	opp_data = kzalloc(sizeof(*opp_data), GFP_KERNEL);
+	if (!opp_data)
+		return -ENOMEM;
+
+	opp_data->soc_data = match->data;
+
+	opp_data->cpu_dev = get_cpu_device(0);
+	if (!opp_data->cpu_dev) {
+		pr_err("%s: Failed to get device for CPU0\n", __func__);
+		return -ENODEV;
+	}
+
+	opp_data->opp_node = dev_pm_opp_of_get_opp_desc_node(opp_data->cpu_dev);
+	if (!opp_data->opp_node) {
+		dev_info(opp_data->cpu_dev,
+			 "OPP-v2 not supported, cpufreq-dt will attempt to use legacy tables.\n");
+		goto register_cpufreq_dt;
+	}
+
+	ret = ti_cpufreq_setup_syscon_registers(opp_data);
+	if (ret)
+		goto fail_put_node;
+
+	/*
+	 * OPPs determine whether or not they are supported based on
+	 * two metrics:
+	 *	0 - SoC Revision
+	 *	1 - eFuse value
+	 */
+	ret = ti_cpufreq_get_rev(opp_data, &version[0]);
+	if (ret)
+		goto fail_put_node;
+
+	ret = ti_cpufreq_get_efuse(opp_data, &version[1]);
+	if (ret)
+		goto fail_put_node;
+
+	of_node_put(opp_data->opp_node);
+
+	ret = dev_pm_opp_set_supported_hw(opp_data->cpu_dev, version,
+					  VERSION_COUNT);
+	if (ret) {
+		dev_err(opp_data->cpu_dev,
+			"Failed to set supported hardware\n");
+		goto fail_put_node;
+	}
+
+register_cpufreq_dt:
+	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
+
+	return 0;
+
+fail_put_node:
+	of_node_put(opp_data->opp_node);
+
+	return ret;
+}
+module_init(ti_cpufreq_init);
+
+MODULE_DESCRIPTION("TI CPUFreq/OPP hw-supported driver");
+MODULE_AUTHOR("Dave Gerlach <d-gerlach@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 12d417a..ac953db 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -61,6 +61,20 @@ config GPIO_SYSFS
 	  Kernel drivers may also request that a particular GPIO be
 	  exported to userspace; this can be useful when debugging.
 
+config GPIO_OF_HELPER
+	bool "GPIO OF helper device (EXPERIMENTAL)"
+	depends on OF_GPIO
+	help
+	  Say Y here to add an GPIO OF helper driver
+
+	  Allows you specify a GPIO helper based on OF
+	  which allows simple export of GPIO functionality
+	  in user-space.
+
+	  Features include, value set/get, direction control,
+	  interrupt/value change poll support, event counting
+	  and others.
+
 config GPIO_GENERIC
 	depends on HAS_IOMEM # Only for IOMEM drivers
 	tristate
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index d074c22..e4e13b8 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_GPIOLIB)		+= gpiolib-legacy.o
 obj-$(CONFIG_OF_GPIO)		+= gpiolib-of.o
 obj-$(CONFIG_GPIO_SYSFS)	+= gpiolib-sysfs.o
 obj-$(CONFIG_GPIO_ACPI)		+= gpiolib-acpi.o
+obj-$(CONFIG_GPIO_OF_HELPER)	+= gpio-of-helper.o
 
 # Device drivers. Generally keep list sorted alphabetically
 obj-$(CONFIG_GPIO_GENERIC)	+= gpio-generic.o
diff --git b/drivers/gpio/gpio-of-helper.c b/drivers/gpio/gpio-of-helper.c
new file mode 100644
index 0000000..83f362f
--- /dev/null
+++ b/drivers/gpio/gpio-of-helper.c
@@ -0,0 +1,435 @@
+/*
+ * GPIO OF based helper
+ *
+ * A simple DT based driver to provide access to GPIO functionality
+ * to user-space via sysfs.
+ *
+ * Copyright (C) 2013 Pantelis Antoniou <panto@antoniou-consulting.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/atomic.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/math64.h>
+#include <linux/atomic.h>
+#include <linux/idr.h>
+
+/* fwd decl. */
+struct gpio_of_helper_info;
+
+enum gpio_type {
+	GPIO_TYPE_INPUT = 0,
+	GPIO_TYPE_OUTPUT = 1,
+};
+
+struct gpio_of_entry {
+	int id;
+	struct gpio_of_helper_info *info;
+	struct device_node *node;
+	enum gpio_type type;
+	int gpio;
+	int irq;
+	const char *name;
+	atomic64_t counter;
+	unsigned int count_flags;
+#define COUNT_RISING_EDGE	(1 << 0)
+#define COUNT_FALLING_EDGE	(1 << 1)
+};
+
+struct gpio_of_helper_info {
+	struct platform_device *pdev;
+	struct idr idr;
+};
+
+static const struct of_device_id gpio_of_helper_of_match[] = {
+	{
+		.compatible = "gpio-of-helper",
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, gpio_of_helper_of_match);
+
+static ssize_t gpio_of_helper_show_status(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct gpio_of_helper_info *info = platform_get_drvdata(pdev);
+	struct gpio_of_entry *entry;
+	char *p, *e;
+	int id, n;
+
+	p = buf;
+	e = p + PAGE_SIZE;
+	n = 0;
+	idr_for_each_entry(&info->idr, entry, id) {
+		switch (entry->type) {
+		case GPIO_TYPE_INPUT:
+			n = snprintf(p, e - p, "%2d %-24s %3d %-3s %llu\n",
+				entry->id, entry->name, entry->gpio, "IN",
+				(unsigned long long)
+					atomic64_read(&entry->counter));
+			break;
+		case GPIO_TYPE_OUTPUT:
+			n = snprintf(p, e - p, "%2d %-24s %3d %-3s\n",
+				entry->id, entry->name, entry->gpio, "OUT");
+			break;
+		}
+		p += n;
+	}
+
+	return p - buf;
+}
+
+static DEVICE_ATTR(status, S_IRUGO,
+		gpio_of_helper_show_status, NULL);
+
+static irqreturn_t gpio_of_helper_handler(int irq, void *ptr)
+{
+	struct gpio_of_entry *entry = ptr;
+
+	/* caution - low speed interfaces only! */
+	atomic64_inc(&entry->counter);
+
+	return IRQ_HANDLED;
+}
+
+static struct gpio_of_entry *
+gpio_of_entry_create(struct gpio_of_helper_info *info,
+		struct device_node *node)
+{
+	struct platform_device *pdev = info->pdev;
+	struct device *dev = &pdev->dev;
+	struct gpio_of_entry *entry;
+	int err, gpio, irq;
+	unsigned int req_flags, count_flags, irq_flags;
+	enum gpio_type type;
+	enum of_gpio_flags gpio_flags;
+	const char *name;
+
+	/* get the type of the node first */
+	if (of_property_read_bool(node, "input"))
+		type = GPIO_TYPE_INPUT;
+	else if (of_property_read_bool(node, "output")
+			|| of_property_read_bool(node, "init-low")
+			|| of_property_read_bool(node, "init-high"))
+		type = GPIO_TYPE_OUTPUT;
+	else {
+		dev_err(dev, "Not valid gpio node type\n");
+		err = -EINVAL;
+		goto err_bad_node;
+	}
+
+	/* get the name */
+	if (of_property_read_string(node, "line-name", &name))
+		if (of_property_read_string(node, "gpio-name", &name))
+			name = node->name;
+
+	err = of_get_named_gpio_flags(node, "gpio", 0, &gpio_flags);
+	if (IS_ERR_VALUE(err)) {
+		dev_err(dev, "Failed to get gpio property of '%s'\n", name);
+		goto err_bad_node;
+	}
+	gpio = err;
+
+	req_flags = 0;
+	count_flags = 0;
+
+	/* set the request flags */
+	switch (type) {
+		case GPIO_TYPE_INPUT:
+			req_flags = GPIOF_DIR_IN | GPIOF_EXPORT;
+			if (of_property_read_bool(node, "count-falling-edge"))
+				count_flags |= COUNT_FALLING_EDGE;
+			if (of_property_read_bool(node, "count-rising-edge"))
+				count_flags |= COUNT_RISING_EDGE;
+			break;
+		case GPIO_TYPE_OUTPUT:
+			req_flags = GPIOF_DIR_OUT | GPIOF_EXPORT;
+			if (of_property_read_bool(node, "init-high"))
+				req_flags |= GPIOF_OUT_INIT_HIGH;
+			else if (of_property_read_bool(node, "init-low"))
+				req_flags |= GPIOF_OUT_INIT_LOW;
+			break;
+	}
+	if (of_property_read_bool(node, "dir-changeable"))
+		req_flags |= GPIOF_EXPORT_CHANGEABLE;
+	if (gpio_flags & OF_GPIO_ACTIVE_LOW)
+		req_flags |= GPIOF_ACTIVE_LOW;
+	if (gpio_flags & OF_GPIO_SINGLE_ENDED) {
+		if (gpio_flags & OF_GPIO_ACTIVE_LOW)
+			req_flags |= GPIOF_OPEN_DRAIN;
+		else
+			req_flags |= GPIOF_OPEN_SOURCE;
+	}
+
+	/* request the gpio */
+	err = devm_gpio_request_one(dev, gpio, req_flags, name);
+	if (err != 0) {
+		dev_err(dev, "Failed to request gpio '%s'\n", name);
+		goto err_bad_node;
+	}
+
+	irq = -1;
+	irq_flags = 0;
+
+	/* counter mode requested - need an interrupt */
+	if (count_flags != 0) {
+		irq = gpio_to_irq(gpio);
+		if (IS_ERR_VALUE(irq)) {
+			dev_err(dev, "Failed to request gpio '%s'\n", name);
+			goto err_bad_node;
+		}
+
+		if (count_flags & COUNT_RISING_EDGE)
+			irq_flags |= IRQF_TRIGGER_RISING;
+		if (count_flags & COUNT_FALLING_EDGE)
+			irq_flags |= IRQF_TRIGGER_FALLING;
+	}
+
+//	if (!idr_pre_get(&info->idr, GFP_KERNEL)) {
+//		dev_err(dev, "Failed on idr_pre_get of '%s'\n", name);
+//		err = -ENOMEM;
+//		goto err_no_mem;
+//	}
+
+	idr_preload(GFP_KERNEL);
+
+	entry = devm_kzalloc(dev, sizeof(*entry), GFP_KERNEL);
+	if (entry == NULL) {
+		dev_err(dev, "Failed to allocate gpio entry of '%s'\n", name);
+		err = -ENOMEM;
+		goto err_no_mem;
+	}
+
+	entry->id = -1;
+	entry->info = info;
+	entry->node = of_node_get(node);	/* get node reference */
+	entry->type = type;
+	entry->gpio = gpio;
+	entry->irq = irq;
+	entry->name = name;
+
+	/* interrupt enable is last thing done */
+	if (irq >= 0) {
+		atomic64_set(&entry->counter, 0);
+		entry->count_flags = count_flags;
+		err = devm_request_irq(dev, irq, gpio_of_helper_handler,
+				irq_flags, name, entry);
+		if (err != 0) {
+			dev_err(dev, "Failed to request irq of '%s'\n", name);
+			goto err_no_irq;
+		}
+	}
+
+	/* all done; insert */
+//	err = idr_get_new(&info->idr, entry, &entry->id);
+//	if (IS_ERR_VALUE(err)) {
+//		dev_err(dev, "Failed to idr_get_new  of '%s'\n", name);
+//		goto err_fail_idr;
+//	}
+
+	err = idr_alloc(&info->idr, entry, 0, 0, GFP_NOWAIT);
+	if (err >= 0)
+		entry->id = err;
+
+	idr_preload_end();
+
+	if (err < 0) {
+		dev_err(dev, "Failed to idr_get_new  of '%s'\n", name);
+		goto err_fail_idr;
+	}
+
+	dev_dbg(dev, "Allocated GPIO id=%d name='%s'\n", entry->id, name);
+
+	return entry;
+
+err_fail_idr:
+	/* nothing to do */
+err_no_irq:
+	/* release node ref */
+	of_node_put(node);
+	/* nothing else needs to be done, devres handles it */
+err_no_mem:
+err_bad_node:
+	return ERR_PTR(err);
+}
+
+static int gpio_of_entry_destroy(struct gpio_of_entry *entry)
+{
+	struct gpio_of_helper_info *info = entry->info;
+	struct platform_device *pdev = info->pdev;
+	struct device *dev = &pdev->dev;
+
+	dev_dbg(dev, "Destroying GPIO id=%d\n", entry->id);
+
+	/* remove from the IDR */
+	idr_remove(&info->idr, entry->id);
+
+	/* remove node ref */
+	of_node_put(entry->node);
+
+	/* free gpio */
+	devm_gpio_free(dev, entry->gpio);
+
+	/* gree irq */
+	if (entry->irq >= 0)
+		devm_free_irq(dev, entry->irq, entry);
+
+	/* and free */
+	devm_kfree(dev, entry);
+
+	return 0;
+}
+
+static int gpio_of_helper_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct gpio_of_helper_info *info;
+	struct gpio_of_entry *entry;
+	struct device_node *pnode = pdev->dev.of_node;
+	struct device_node *cnode;
+	struct pinctrl *pinctrl;
+	int err;
+
+	/* we only support OF */
+	if (pnode == NULL) {
+		dev_err(&pdev->dev, "No platform of_node!\n");
+		return -ENODEV;
+	}
+
+	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
+	if (IS_ERR(pinctrl)) {
+		/* special handling for probe defer */
+		if (PTR_ERR(pinctrl) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		dev_warn(&pdev->dev,
+			"pins are not configured from the driver\n");
+	}
+
+	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
+	if (info == NULL) {
+		dev_err(&pdev->dev, "Failed to allocate info\n");
+		err = -ENOMEM;
+		goto err_no_mem;
+	}
+	platform_set_drvdata(pdev, info);
+	info->pdev = pdev;
+
+	idr_init(&info->idr);
+
+	err = device_create_file(dev, &dev_attr_status);
+	if (err != 0) {
+		dev_err(dev, "Failed to create status sysfs attribute\n");
+		goto err_no_sysfs;
+	}
+
+	for_each_child_of_node(pnode, cnode) {
+
+		entry = gpio_of_entry_create(info, cnode);
+		if (IS_ERR_OR_NULL(entry)) {
+			dev_err(dev, "Failed to create gpio entry\n");
+			err = PTR_ERR(entry);
+			goto err_fail_entry;
+		}
+	}
+
+	dev_info(&pdev->dev, "ready\n");
+
+	return 0;
+err_fail_entry:
+	device_remove_file(&pdev->dev, &dev_attr_status);
+err_no_sysfs:
+err_no_mem:
+	return err;
+}
+
+static int gpio_of_helper_remove(struct platform_device *pdev)
+{
+	struct gpio_of_helper_info *info = platform_get_drvdata(pdev);
+	struct gpio_of_entry *entry;
+	int id;
+
+	dev_info(&pdev->dev, "removing\n");
+
+	device_remove_file(&pdev->dev, &dev_attr_status);
+
+	id = 0;
+	idr_for_each_entry(&info->idr, entry, id) {
+		/* destroy each and every one */
+		gpio_of_entry_destroy(entry);
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+//#ifdef CONFIG_PM_RUNTIME
+static int gpio_of_helper_runtime_suspend(struct device *dev)
+{
+	/* place holder */
+	return 0;
+}
+
+static int gpio_of_helper_runtime_resume(struct device *dev)
+{
+	/* place holder */
+	return 0;
+}
+//#endif /* CONFIG_PM_RUNTIME */
+
+static struct dev_pm_ops gpio_of_helper_pm_ops = {
+	SET_RUNTIME_PM_OPS(gpio_of_helper_runtime_suspend,
+			   gpio_of_helper_runtime_resume, NULL)
+};
+#define GPIO_OF_HELPER_PM_OPS (&gpio_of_helper_pm_ops)
+#else
+#define GPIO_OF_HELPER_PM_OPS NULL
+#endif /* CONFIG_PM */
+
+struct platform_driver gpio_of_helper_driver = {
+	.probe		= gpio_of_helper_probe,
+	.remove		= gpio_of_helper_remove,
+	.driver = {
+		.name		= "gpio-of-helper",
+		.owner		= THIS_MODULE,
+		.pm		= GPIO_OF_HELPER_PM_OPS,
+		.of_match_table	= gpio_of_helper_of_match,
+	},
+};
+
+module_platform_driver(gpio_of_helper_driver);
+
+MODULE_AUTHOR("Pantelis Antoniou <panto@antoniou-consulting.com>");
+MODULE_DESCRIPTION("GPIO OF Helper driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:gpio-of-helper");
diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index 4b44dd9..9c6de28 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -35,10 +35,10 @@ static DEFINE_MUTEX(sysfs_lock);
 /*
  * /sys/class/gpio/gpioN... only for GPIOs that are exported
  *   /direction
- *      * MAY BE OMITTED if kernel won't allow direction changes
  *      * is read/write as "in" or "out"
  *      * may also be written as "high" or "low", initializing
  *        output value as specified ("out" implies "low")
+ *      * read-only if kernel won't allow direction changes
  *   /value
  *      * always readable, subject to hardware behavior
  *      * may be writable, as zero/nonzero
@@ -51,6 +51,8 @@ static DEFINE_MUTEX(sysfs_lock);
  *      * is read/write as zero/nonzero
  *      * also affects existing and subsequent "falling" and "rising"
  *        /edge configuration
+ *   /label
+ *      * descriptor label
  */
 
 static ssize_t direction_show(struct device *dev,
@@ -81,7 +83,9 @@ static ssize_t direction_store(struct device *dev,
 
 	mutex_lock(&data->mutex);
 
-	if (sysfs_streq(buf, "high"))
+	if (!data->direction_can_change)
+		status = -EPERM;
+	else if (sysfs_streq(buf, "high"))
 		status = gpiod_direction_output_raw(desc, 1);
 	else if (sysfs_streq(buf, "out") || sysfs_streq(buf, "low"))
 		status = gpiod_direction_output_raw(desc, 0);
@@ -350,6 +354,23 @@ static ssize_t active_low_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(active_low);
 
+static ssize_t label_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct gpiod_data *data = dev_get_drvdata(dev);
+	struct gpio_desc *desc = data->desc;
+	ssize_t			status;
+
+	mutex_lock(&data->mutex);
+
+	status = sprintf(buf, "%s\n", desc->label);
+
+	mutex_unlock(&data->mutex);
+
+	return status;
+}
+static DEVICE_ATTR_RO(label);
+
 static umode_t gpio_is_visible(struct kobject *kobj, struct attribute *attr,
 			       int n)
 {
@@ -361,12 +382,15 @@ static umode_t gpio_is_visible(struct kobject *kobj, struct attribute *attr,
 
 	if (attr == &dev_attr_direction.attr) {
 		if (!show_direction)
-			mode = 0;
+			mode &= 0444;
 	} else if (attr == &dev_attr_edge.attr) {
 		if (gpiod_to_irq(desc) < 0)
 			mode = 0;
 		if (!show_direction && test_bit(FLAG_IS_OUT, &desc->flags))
 			mode = 0;
+	} else if (attr == &dev_attr_value.attr) {
+		if (!show_direction && !test_bit(FLAG_IS_OUT, &desc->flags))
+			mode &= 0444;
 	}
 
 	return mode;
@@ -377,6 +401,7 @@ static struct attribute *gpio_attrs[] = {
 	&dev_attr_edge.attr,
 	&dev_attr_value.attr,
 	&dev_attr_active_low.attr,
+	&dev_attr_label.attr,
 	NULL,
 };
 
@@ -390,6 +415,10 @@ static const struct attribute_group *gpio_groups[] = {
 	NULL
 };
 
+/* bwlegh, a second device in the same file... get out of my namespace! */
+#define dev_attr_label dev_attr_chip_label
+#define label_show chip_label_show
+
 /*
  * /sys/class/gpio/gpiochipN/
  *   /base ... matching gpio_chip.base (N)
diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig
index 2cde7a5..b776f41 100644
--- a/drivers/gpu/drm/etnaviv/Kconfig
+++ b/drivers/gpu/drm/etnaviv/Kconfig
@@ -2,7 +2,7 @@
 config DRM_ETNAVIV
 	tristate "ETNAVIV (DRM support for Vivante GPU IP cores)"
 	depends on DRM
-	depends on ARCH_MXC || ARCH_DOVE
+	depends on ARCH_MXC || ARCH_DOVE || ARCH_OMAP2PLUS
 	select SHMEM
 	select TMPFS
 	select IOMMU_API
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index aa68766..20d20ca 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -654,6 +654,7 @@ static int etnaviv_pdev_remove(struct platform_device *pdev)
 static const struct of_device_id dt_match[] = {
 	{ .compatible = "fsl,imx-gpu-subsystem" },
 	{ .compatible = "marvell,dove-gpu-subsystem" },
+	{ .compatible = "vivante,gc-gpu-subsystem"},
 	{}
 };
 MODULE_DEVICE_TABLE(of, dt_match);
diff --git a/drivers/gpu/drm/i2c/Kconfig b/drivers/gpu/drm/i2c/Kconfig
index a6c92be..66ff8e3 100644
--- a/drivers/gpu/drm/i2c/Kconfig
+++ b/drivers/gpu/drm/i2c/Kconfig
@@ -1,6 +1,12 @@
 menu "I2C encoder or helper chips"
      depends on DRM && DRM_KMS_HELPER && I2C
 
+config DRM_I2C_ADIHDMI
+	tristate "ADI HDMI encoder"
+	default m if DRM_TILCDC
+	help
+	  Support for ADI HDMI encoder.
+
 config DRM_I2C_CH7006
 	tristate "Chrontel ch7006 TV encoder"
 	default m if DRM_NOUVEAU
diff --git a/drivers/gpu/drm/i2c/Makefile b/drivers/gpu/drm/i2c/Makefile
index 43aa33b..62a4eea 100644
--- a/drivers/gpu/drm/i2c/Makefile
+++ b/drivers/gpu/drm/i2c/Makefile
@@ -8,3 +8,6 @@ obj-$(CONFIG_DRM_I2C_SIL164) += sil164.o
 
 tda998x-y := tda998x_drv.o
 obj-$(CONFIG_DRM_I2C_NXP_TDA998X) += tda998x.o
+
+adihdmi-y := adihdmi_drv.o
+obj-$(CONFIG_DRM_I2C_ADIHDMI) += adihdmi.o
diff --git b/drivers/gpu/drm/i2c/adihdmi.h b/drivers/gpu/drm/i2c/adihdmi.h
new file mode 100644
index 0000000..58d9a2b
--- /dev/null
+++ b/drivers/gpu/drm/i2c/adihdmi.h
@@ -0,0 +1,289 @@
+/*
+ * Analog Devices ADIHDMI HDMI transmitter driver
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2.
+ */
+
+#ifndef __DRM_I2C_ADIHDMI_H__
+#define __DRM_I2C_ADIHDMI_H__
+
+#include <linux/hdmi.h>
+
+#define ADIHDMI_REG_CHIP_REVISION		0x00
+#define ADIHDMI_REG_N0				0x01
+#define ADIHDMI_REG_N1				0x02
+#define ADIHDMI_REG_N2				0x03
+#define ADIHDMI_REG_SPDIF_FREQ			0x04
+#define ADIHDMI_REG_CTS_AUTOMATIC1		0x05
+#define ADIHDMI_REG_CTS_AUTOMATIC2		0x06
+#define ADIHDMI_REG_CTS_MANUAL0			0x07
+#define ADIHDMI_REG_CTS_MANUAL1			0x08
+#define ADIHDMI_REG_CTS_MANUAL2			0x09
+#define ADIHDMI_REG_AUDIO_SOURCE		0x0a
+#define ADIHDMI_REG_AUDIO_CONFIG		0x0b
+#define ADIHDMI_REG_I2S_CONFIG			0x0c
+#define ADIHDMI_REG_I2S_WIDTH			0x0d
+#define ADIHDMI_REG_AUDIO_SUB_SRC0		0x0e
+#define ADIHDMI_REG_AUDIO_SUB_SRC1		0x0f
+#define ADIHDMI_REG_AUDIO_SUB_SRC2		0x10
+#define ADIHDMI_REG_AUDIO_SUB_SRC3		0x11
+#define ADIHDMI_REG_AUDIO_CFG1			0x12
+#define ADIHDMI_REG_AUDIO_CFG2			0x13
+#define ADIHDMI_REG_AUDIO_CFG3			0x14
+#define ADIHDMI_REG_I2C_FREQ_ID_CFG		0x15
+#define ADIHDMI_REG_VIDEO_INPUT_CFG1		0x16
+#define ADIHDMI_REG_CSC_UPPER(x)		(0x18 + (x) * 2)
+#define ADIHDMI_REG_CSC_LOWER(x)		(0x19 + (x) * 2)
+#define ADIHDMI_REG_SYNC_DECODER(x)		(0x30 + (x))
+#define ADIHDMI_REG_DE_GENERATOR		(0x35 + (x))
+#define ADIHDMI_REG_PIXEL_REPETITION		0x3b
+#define ADIHDMI_REG_VIC_MANUAL			0x3c
+#define ADIHDMI_REG_VIC_SEND			0x3d
+#define ADIHDMI_REG_VIC_DETECTED		0x3e
+#define ADIHDMI_REG_AUX_VIC_DETECTED		0x3f
+#define ADIHDMI_REG_PACKET_ENABLE0		0x40
+#define ADIHDMI_REG_POWER			0x41
+#define ADIHDMI_REG_STATUS			0x42
+#define ADIHDMI_REG_EDID_I2C_ADDR		0x43
+#define ADIHDMI_REG_PACKET_ENABLE1		0x44
+#define ADIHDMI_REG_PACKET_I2C_ADDR		0x45
+#define ADIHDMI_REG_DSD_ENABLE			0x46
+#define ADIHDMI_REG_VIDEO_INPUT_CFG2		0x48
+#define ADIHDMI_REG_INFOFRAME_UPDATE		0x4a
+#define ADIHDMI_REG_GC(x)			(0x4b + (x)) /* 0x4b - 0x51 */
+#define ADIHDMI_REG_AVI_INFOFRAME_VERSION	0x52
+#define ADIHDMI_REG_AVI_INFOFRAME_LENGTH	0x53
+#define ADIHDMI_REG_AVI_INFOFRAME_CHECKSUM	0x54
+#define ADIHDMI_REG_AVI_INFOFRAME(x)		(0x55 + (x)) /* 0x55 - 0x6f */
+#define ADIHDMI_REG_AUDIO_INFOFRAME_VERSION	0x70
+#define ADIHDMI_REG_AUDIO_INFOFRAME_LENGTH	0x71
+#define ADIHDMI_REG_AUDIO_INFOFRAME_CHECKSUM	0x72
+#define ADIHDMI_REG_AUDIO_INFOFRAME(x)		(0x73 + (x)) /* 0x73 - 0x7c */
+#define ADIHDMI_REG_INT_ENABLE(x)		(0x94 + (x))
+#define ADIHDMI_REG_INT(x)			(0x96 + (x))
+#define ADIHDMI_REG_INPUT_CLK_DIV		0x9d
+#define ADIHDMI_REG_PLL_STATUS			0x9e
+#define ADIHDMI_REG_HDMI_POWER			0xa1
+#define ADIHDMI_REG_HDCP_HDMI_CFG		0xaf
+#define ADIHDMI_REG_AN(x)			(0xb0 + (x)) /* 0xb0 - 0xb7 */
+#define ADIHDMI_REG_HDCP_STATUS			0xb8
+#define ADIHDMI_REG_BCAPS			0xbe
+#define ADIHDMI_REG_BKSV(x)			(0xc0 + (x)) /* 0xc0 - 0xc3 */
+#define ADIHDMI_REG_EDID_SEGMENT		0xc4
+#define ADIHDMI_REG_DDC_STATUS			0xc8
+#define ADIHDMI_REG_EDID_READ_CTRL		0xc9
+#define ADIHDMI_REG_BSTATUS(x)			(0xca + (x)) /* 0xca - 0xcb */
+#define ADIHDMI_REG_TIMING_GEN_SEQ		0xd0
+#define ADIHDMI_REG_POWER2			0xd6
+#define ADIHDMI_REG_HSYNC_PLACEMENT_MSB		0xfa
+
+#define ADIHDMI_REG_SYNC_ADJUSTMENT(x)		(0xd7 + (x)) /* 0xd7 - 0xdc */
+#define ADIHDMI_REG_TMDS_CLOCK_INV		0xde
+#define ADIHDMI_REG_ARC_CTRL			0xdf
+#define ADIHDMI_REG_CEC_I2C_ADDR		0xe1
+#define ADIHDMI_REG_CEC_CTRL			0xe2
+#define ADIHDMI_REG_CHIP_ID_HIGH		0xf5
+#define ADIHDMI_REG_CHIP_ID_LOW			0xf6
+
+#define ADIHDMI_CSC_ENABLE			BIT(7)
+#define ADIHDMI_CSC_UPDATE_MODE			BIT(5)
+
+#define ADIHDMI_INT0_HDP			BIT(7)
+#define ADIHDMI_INT0_VSYNC			BIT(5)
+#define ADIHDMI_INT0_AUDIO_FIFO_FULL		BIT(4)
+#define ADIHDMI_INT0_EDID_READY			BIT(2)
+#define ADIHDMI_INT0_HDCP_AUTHENTICATED		BIT(1)
+
+#define ADIHDMI_INT1_DDC_ERROR			BIT(7)
+#define ADIHDMI_INT1_BKSV			BIT(6)
+#define ADIHDMI_INT1_CEC_TX_READY		BIT(5)
+#define ADIHDMI_INT1_CEC_TX_ARBIT_LOST		BIT(4)
+#define ADIHDMI_INT1_CEC_TX_RETRY_TIMEOUT	BIT(3)
+#define ADIHDMI_INT1_CEC_RX_READY3		BIT(2)
+#define ADIHDMI_INT1_CEC_RX_READY2		BIT(1)
+#define ADIHDMI_INT1_CEC_RX_READY1		BIT(0)
+
+#define ADIHDMI_ARC_CTRL_POWER_DOWN		BIT(0)
+
+#define ADIHDMI_CEC_CTRL_POWER_DOWN		BIT(0)
+
+#define ADIHDMI_POWER_POWER_DOWN		BIT(6)
+
+#define ADIHDMI_HDMI_CFG_MODE_MASK		0x2
+#define ADIHDMI_HDMI_CFG_MODE_DVI		0x0
+#define ADIHDMI_HDMI_CFG_MODE_HDMI		0x2
+
+#define ADIHDMI_AUDIO_SELECT_I2C		0x0
+#define ADIHDMI_AUDIO_SELECT_SPDIF		0x1
+#define ADIHDMI_AUDIO_SELECT_DSD		0x2
+#define ADIHDMI_AUDIO_SELECT_HBR		0x3
+#define ADIHDMI_AUDIO_SELECT_DST		0x4
+
+#define ADIHDMI_I2S_SAMPLE_LEN_16		0x2
+#define ADIHDMI_I2S_SAMPLE_LEN_20		0x3
+#define ADIHDMI_I2S_SAMPLE_LEN_18		0x4
+#define ADIHDMI_I2S_SAMPLE_LEN_22		0x5
+#define ADIHDMI_I2S_SAMPLE_LEN_19		0x8
+#define ADIHDMI_I2S_SAMPLE_LEN_23		0x9
+#define ADIHDMI_I2S_SAMPLE_LEN_24		0xb
+#define ADIHDMI_I2S_SAMPLE_LEN_17		0xc
+#define ADIHDMI_I2S_SAMPLE_LEN_21		0xd
+
+#define ADIHDMI_SAMPLE_FREQ_44100		0x0
+#define ADIHDMI_SAMPLE_FREQ_48000		0x2
+#define ADIHDMI_SAMPLE_FREQ_32000		0x3
+#define ADIHDMI_SAMPLE_FREQ_88200		0x8
+#define ADIHDMI_SAMPLE_FREQ_96000		0xa
+#define ADIHDMI_SAMPLE_FREQ_176400		0xc
+#define ADIHDMI_SAMPLE_FREQ_192000		0xe
+
+#define ADIHDMI_STATUS_POWER_DOWN_POLARITY	BIT(7)
+#define ADIHDMI_STATUS_HPD			BIT(6)
+#define ADIHDMI_STATUS_MONITOR_SENSE		BIT(5)
+#define ADIHDMI_STATUS_I2S_32BIT_MODE		BIT(3)
+
+#define ADIHDMI_PACKET_ENABLE_N_CTS		BIT(8+6)
+#define ADIHDMI_PACKET_ENABLE_AUDIO_SAMPLE	BIT(8+5)
+#define ADIHDMI_PACKET_ENABLE_AVI_INFOFRAME	BIT(8+4)
+#define ADIHDMI_PACKET_ENABLE_AUDIO_INFOFRAME	BIT(8+3)
+#define ADIHDMI_PACKET_ENABLE_GC		BIT(7)
+#define ADIHDMI_PACKET_ENABLE_SPD		BIT(6)
+#define ADIHDMI_PACKET_ENABLE_MPEG		BIT(5)
+#define ADIHDMI_PACKET_ENABLE_ACP		BIT(4)
+#define ADIHDMI_PACKET_ENABLE_ISRC		BIT(3)
+#define ADIHDMI_PACKET_ENABLE_GM		BIT(2)
+#define ADIHDMI_PACKET_ENABLE_SPARE2		BIT(1)
+#define ADIHDMI_PACKET_ENABLE_SPARE1		BIT(0)
+
+#define ADIHDMI_REG_POWER2_HDP_SRC_MASK		0xc0
+#define ADIHDMI_REG_POWER2_HDP_SRC_BOTH		0x00
+#define ADIHDMI_REG_POWER2_HDP_SRC_HDP		0x40
+#define ADIHDMI_REG_POWER2_HDP_SRC_CEC		0x80
+#define ADIHDMI_REG_POWER2_HDP_SRC_NONE		0xc0
+#define ADIHDMI_REG_POWER2_TDMS_ENABLE		BIT(4)
+#define ADIHDMI_REG_POWER2_GATE_INPUT_CLK	BIT(0)
+
+#define ADIHDMI_LOW_REFRESH_RATE_NONE		0x0
+#define ADIHDMI_LOW_REFRESH_RATE_24HZ		0x1
+#define ADIHDMI_LOW_REFRESH_RATE_25HZ		0x2
+#define ADIHDMI_LOW_REFRESH_RATE_30HZ		0x3
+
+#define ADIHDMI_AUDIO_CFG3_LEN_MASK		0x0f
+#define ADIHDMI_I2C_FREQ_ID_CFG_RATE_MASK	0xf0
+
+#define ADIHDMI_AUDIO_SOURCE_I2S		0
+#define ADIHDMI_AUDIO_SOURCE_SPDIF		1
+
+#define ADIHDMI_I2S_FORMAT_I2S			0
+#define ADIHDMI_I2S_FORMAT_RIGHT_J		1
+#define ADIHDMI_I2S_FORMAT_LEFT_J		2
+
+#define ADIHDMI_PACKET(p, x)	    ((p) * 0x20 + (x))
+#define ADIHDMI_PACKET_SDP(x)	    ADIHDMI_PACKET(0, x)
+#define ADIHDMI_PACKET_MPEG(x)	    ADIHDMI_PACKET(1, x)
+#define ADIHDMI_PACKET_ACP(x)	    ADIHDMI_PACKET(2, x)
+#define ADIHDMI_PACKET_ISRC1(x)	    ADIHDMI_PACKET(3, x)
+#define ADIHDMI_PACKET_ISRC2(x)	    ADIHDMI_PACKET(4, x)
+#define ADIHDMI_PACKET_GM(x)	    ADIHDMI_PACKET(5, x)
+#define ADIHDMI_PACKET_SPARE(x)	    ADIHDMI_PACKET(6, x)
+
+enum adihdmi_input_clock {
+	ADIHDMI_INPUT_CLOCK_1X,
+	ADIHDMI_INPUT_CLOCK_2X,
+	ADIHDMI_INPUT_CLOCK_DDR,
+};
+
+enum adihdmi_input_justification {
+	ADIHDMI_INPUT_JUSTIFICATION_EVENLY = 0,
+	ADIHDMI_INPUT_JUSTIFICATION_RIGHT = 1,
+	ADIHDMI_INPUT_JUSTIFICATION_LEFT = 2,
+};
+
+enum adihdmi_input_sync_pulse {
+	ADIHDMI_INPUT_SYNC_PULSE_DE = 0,
+	ADIHDMI_INPUT_SYNC_PULSE_HSYNC = 1,
+	ADIHDMI_INPUT_SYNC_PULSE_VSYNC = 2,
+	ADIHDMI_INPUT_SYNC_PULSE_NONE = 3,
+};
+
+/**
+ * enum adihdmi_sync_polarity - Polarity for the input sync signals
+ * @ADIHDMI_SYNC_POLARITY_PASSTHROUGH:  Sync polarity matches that of
+ *				       the currently configured mode.
+ * @ADIHDMI_SYNC_POLARITY_LOW:	    Sync polarity is low
+ * @ADIHDMI_SYNC_POLARITY_HIGH:	    Sync polarity is high
+ *
+ * If the polarity is set to either LOW or HIGH the driver will configure the
+ * ADIHDMI to internally invert the sync signal if required to match the sync
+ * polarity setting for the currently selected output mode.
+ *
+ * If the polarity is set to PASSTHROUGH, the ADIHDMI will route the signal
+ * unchanged. This is used when the upstream graphics core already generates
+ * the sync signals with the correct polarity.
+ */
+enum adihdmi_sync_polarity {
+	ADIHDMI_SYNC_POLARITY_PASSTHROUGH,
+	ADIHDMI_SYNC_POLARITY_LOW,
+	ADIHDMI_SYNC_POLARITY_HIGH,
+};
+
+/**
+ * struct adihdmi_link_config - Describes adihdmi hardware configuration
+ * @input_color_depth:		Number of bits per color component (8, 10 or 12)
+ * @input_colorspace:		The input colorspace (RGB, YUV444, YUV422)
+ * @input_clock:		The input video clock style (1x, 2x, DDR)
+ * @input_style:		The input component arrangement variant
+ * @input_justification:	Video input format bit justification
+ * @clock_delay:		Clock delay for the input clock (in ps)
+ * @embedded_sync:		Video input uses BT.656-style embedded sync
+ * @sync_pulse:			Select the sync pulse
+ * @vsync_polarity:		vsync input signal configuration
+ * @hsync_polarity:		hsync input signal configuration
+ */
+struct adihdmi_link_config {
+	unsigned int input_color_depth;
+	enum hdmi_colorspace input_colorspace;
+	enum adihdmi_input_clock input_clock;
+	unsigned int input_style;
+	enum adihdmi_input_justification input_justification;
+
+	int clock_delay;
+
+	bool embedded_sync;
+	enum adihdmi_input_sync_pulse sync_pulse;
+	enum adihdmi_sync_polarity vsync_polarity;
+	enum adihdmi_sync_polarity hsync_polarity;
+};
+
+/**
+ * enum adihdmi_csc_scaling - Scaling factor for the ADIHDMI CSC
+ * @ADIHDMI_CSC_SCALING_1: CSC results are not scaled
+ * @ADIHDMI_CSC_SCALING_2: CSC results are scaled by a factor of two
+ * @ADIHDMI_CSC_SCALING_4: CSC results are scalled by a factor of four
+ */
+enum adihdmi_csc_scaling {
+	ADIHDMI_CSC_SCALING_1 = 0,
+	ADIHDMI_CSC_SCALING_2 = 1,
+	ADIHDMI_CSC_SCALING_4 = 2,
+};
+
+/**
+ * struct adihdmi_video_config - Describes adihdmi hardware configuration
+ * @csc_enable:			Whether to enable color space conversion
+ * @csc_scaling_factor:		Color space conversion scaling factor
+ * @csc_coefficents:		Color space conversion coefficents
+ * @hdmi_mode:			Whether to use HDMI or DVI output mode
+ * @avi_infoframe:		HDMI infoframe
+ */
+struct adihdmi_video_config {
+	bool csc_enable;
+	enum adihdmi_csc_scaling csc_scaling_factor;
+	const uint16_t *csc_coefficents;
+
+	bool hdmi_mode;
+	struct hdmi_avi_infoframe avi_infoframe;
+};
+
+#endif /* __DRM_I2C_ADIHDMI_H__ */
diff --git b/drivers/gpu/drm/i2c/adihdmi_drv.c b/drivers/gpu/drm/i2c/adihdmi_drv.c
new file mode 100644
index 0000000..6792224
--- /dev/null
+++ b/drivers/gpu/drm/i2c/adihdmi_drv.c
@@ -0,0 +1,1268 @@
+/*
+ * Analog Devices ADIHDMI HDMI transmitter driver
+ *
+ * Copyright 2012 Analog Devices Inc.
+ * Copyright 2015 Konsulko Group
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/component.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_encoder_slave.h>
+#include <drm_of.h>
+
+#include "adihdmi.h"
+
+#define ADIHDMI_INFOFRAME_PACKETS   (0x7900)
+
+struct adihdmi {
+	struct i2c_client *i2c_main;
+	struct i2c_client *i2c_edid;
+
+	struct regmap *regmap;
+	struct regmap *packet_memory_regmap;
+	enum drm_connector_status status;
+	bool powered;
+
+	unsigned int f_tmds;
+
+	unsigned int current_edid_segment;
+	uint8_t edid_buf[256];
+	bool edid_read;
+
+	wait_queue_head_t wq;
+	struct drm_encoder *encoder;
+
+	bool embedded_sync;
+	enum adihdmi_sync_polarity vsync_polarity;
+	enum adihdmi_sync_polarity hsync_polarity;
+	bool rgb;
+
+	struct edid *edid;
+
+	struct gpio_desc *gpio_pd;
+};
+
+struct adihdmi2 {
+	struct adihdmi base;
+	struct drm_encoder encoder;
+	struct drm_connector connector;
+};
+
+/* ADI recommended values for proper operation. */
+static const struct reg_sequence adihdmi_fixed_registers[] = {
+	{ 0x98, 0x03 },
+	{ 0x9a, 0xe0 },
+	{ 0x9c, 0x30 },
+	{ 0x9d, 0x61 },
+	{ 0xa2, 0xa4 },
+	{ 0xa3, 0xa4 },
+	{ 0xe0, 0xd0 },
+	{ 0xf9, 0x00 },
+	{ 0x55, 0x02 },
+};
+
+/* -----------------------------------------------------------------------------
+ * Register access
+ */
+
+static const uint8_t adihdmi_register_defaults[] = {
+	0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 00 */
+	0x00, 0x00, 0x01, 0x0e, 0xbc, 0x18, 0x01, 0x13,
+	0x25, 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 10 */
+	0x46, 0x62, 0x04, 0xa8, 0x00, 0x00, 0x1c, 0x84,
+	0x1c, 0xbf, 0x04, 0xa8, 0x1e, 0x70, 0x02, 0x1e, /* 20 */
+	0x00, 0x00, 0x04, 0xa8, 0x08, 0x12, 0x1b, 0xac,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 30 */
+	0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0xb0,
+	0x00, 0x50, 0x90, 0x7e, 0x79, 0x70, 0x00, 0x00, /* 40 */
+	0x00, 0xa8, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x02, 0x0d, 0x00, 0x00, 0x00, 0x00, /* 50 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 60 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x01, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 70 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 80 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, /* 90 */
+	0x0b, 0x02, 0x00, 0x18, 0x5a, 0x60, 0x00, 0x00,
+	0x00, 0x00, 0x80, 0x80, 0x08, 0x04, 0x00, 0x00, /* a0 */
+	0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x14,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* b0 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* c0 */
+	0x00, 0x03, 0x00, 0x00, 0x02, 0x00, 0x01, 0x04,
+	0x30, 0xff, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00, /* d0 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x01,
+	0x80, 0x75, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, /* e0 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x75, 0x11, 0x00, /* f0 */
+	0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static bool adihdmi_register_volatile(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+		case ADIHDMI_REG_CHIP_REVISION:
+		case ADIHDMI_REG_SPDIF_FREQ:
+		case ADIHDMI_REG_CTS_AUTOMATIC1:
+		case ADIHDMI_REG_CTS_AUTOMATIC2:
+		case ADIHDMI_REG_VIC_DETECTED:
+		case ADIHDMI_REG_VIC_SEND:
+		case ADIHDMI_REG_AUX_VIC_DETECTED:
+		case ADIHDMI_REG_STATUS:
+		case ADIHDMI_REG_GC(1):
+		case ADIHDMI_REG_INT(0):
+		case ADIHDMI_REG_INT(1):
+		case ADIHDMI_REG_PLL_STATUS:
+		case ADIHDMI_REG_AN(0):
+		case ADIHDMI_REG_AN(1):
+		case ADIHDMI_REG_AN(2):
+		case ADIHDMI_REG_AN(3):
+		case ADIHDMI_REG_AN(4):
+		case ADIHDMI_REG_AN(5):
+		case ADIHDMI_REG_AN(6):
+		case ADIHDMI_REG_AN(7):
+		case ADIHDMI_REG_HDCP_STATUS:
+		case ADIHDMI_REG_BCAPS:
+		case ADIHDMI_REG_BKSV(0):
+		case ADIHDMI_REG_BKSV(1):
+		case ADIHDMI_REG_BKSV(2):
+		case ADIHDMI_REG_BKSV(3):
+		case ADIHDMI_REG_BKSV(4):
+		case ADIHDMI_REG_DDC_STATUS:
+		case ADIHDMI_REG_BSTATUS(0):
+		case ADIHDMI_REG_BSTATUS(1):
+		case ADIHDMI_REG_CHIP_ID_HIGH:
+		case ADIHDMI_REG_CHIP_ID_LOW:
+			return true;
+	}
+
+	return false;
+}
+
+static const struct regmap_config adihdmi_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = 0xff,
+	.cache_type = REGCACHE_RBTREE,
+	.reg_defaults_raw = adihdmi_register_defaults,
+	.num_reg_defaults_raw = ARRAY_SIZE(adihdmi_register_defaults),
+
+	.volatile_reg = adihdmi_register_volatile,
+};
+
+/* -----------------------------------------------------------------------------
+ * Hardware configuration
+ */
+
+	static void adihdmi_audio_setup(struct adihdmi * adihdmi)
+{
+	/* Select I2S. */
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_AUDIO_SOURCE, 0x01);
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_I2S_CONFIG, 0x84);
+
+	/* Setup clocks for 48KHz. */
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_N0, 0x00);
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_N1, 0x18);
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_N2, 0x00);
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_I2C_FREQ_ID_CFG, 0xF0, 0x20);
+
+	/* Set audio word length to 24 bits. */
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_AUDIO_CFG3, 0x0F, 0x0B);
+
+	/* Update audio infoframe. */
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_INFOFRAME_UPDATE, 0x20, 0x20);
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_AUDIO_INFOFRAME(0), 0x07, 0x01);
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_AUDIO_INFOFRAME(3), 0x1F, 0x00);
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_INFOFRAME_UPDATE, 0x20, 0x00);
+}
+
+static void adihdmi_set_colormap(struct adihdmi *adihdmi, bool enable,
+		const uint16_t *coeff,
+		unsigned int scaling_factor)
+{
+	unsigned int i;
+
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_CSC_UPPER(1),
+			ADIHDMI_CSC_UPDATE_MODE, ADIHDMI_CSC_UPDATE_MODE);
+
+	if (enable) {
+		for (i = 0; i < 12; ++i) {
+			regmap_update_bits(adihdmi->regmap,
+					ADIHDMI_REG_CSC_UPPER(i),
+					0x1f, coeff[i] >> 8);
+			regmap_write(adihdmi->regmap,
+					ADIHDMI_REG_CSC_LOWER(i),
+					coeff[i] & 0xff);
+		}
+	}
+
+	if (enable)
+		regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_CSC_UPPER(0),
+				0xe0, 0x80 | (scaling_factor << 5));
+	else
+		regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_CSC_UPPER(0),
+				0x80, 0x00);
+
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_CSC_UPPER(1),
+			ADIHDMI_CSC_UPDATE_MODE, 0);
+}
+
+static int adihdmi_packet_enable(struct adihdmi *adihdmi, unsigned int packet)
+{
+	if (packet & 0xff)
+		regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_PACKET_ENABLE0,
+				packet, 0xff);
+
+	if (packet & 0xff00) {
+		packet >>= 8;
+		regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_PACKET_ENABLE1,
+				packet, 0xff);
+	}
+
+	return 0;
+}
+
+static int adihdmi_packet_disable(struct adihdmi *adihdmi, unsigned int packet)
+{
+	if (packet & 0xff)
+		regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_PACKET_ENABLE0,
+				packet, 0x00);
+
+	if (packet & 0xff00) {
+		packet >>= 8;
+		regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_PACKET_ENABLE1,
+				packet, 0x00);
+	}
+
+	return 0;
+}
+
+/* Coefficients for adihdmi color space conversion */
+static const uint16_t adihdmi_csc_ycbcr_to_rgb[] = {
+	0x0734, 0x04ad, 0x0000, 0x1c1b,
+	0x1ddc, 0x04ad, 0x1f24, 0x0135,
+	0x0000, 0x04ad, 0x087c, 0x1b77,
+};
+
+static void adihdmi_set_config_csc(struct adihdmi *adihdmi,
+		struct drm_connector *connector,
+		bool rgb)
+{
+	struct adihdmi_video_config config;
+	bool output_format_422, output_format_ycbcr;
+	unsigned int mode;
+	uint8_t infoframe[17];
+
+	if (adihdmi->edid)
+		config.hdmi_mode = drm_detect_hdmi_monitor(adihdmi->edid);
+	else
+		config.hdmi_mode = false;
+
+	hdmi_avi_infoframe_init(&config.avi_infoframe);
+
+	config.avi_infoframe.scan_mode = HDMI_SCAN_MODE_UNDERSCAN;
+
+	if (rgb) {
+		config.csc_enable = false;
+		config.avi_infoframe.colorspace = HDMI_COLORSPACE_RGB;
+	} else {
+		config.csc_scaling_factor = ADIHDMI_CSC_SCALING_4;
+		config.csc_coefficents = adihdmi_csc_ycbcr_to_rgb;
+
+		if ((connector->display_info.color_formats &
+					DRM_COLOR_FORMAT_YCRCB422) &&
+				config.hdmi_mode) {
+			config.csc_enable = false;
+			config.avi_infoframe.colorspace =
+				HDMI_COLORSPACE_YUV422;
+		} else {
+			config.csc_enable = true;
+			config.avi_infoframe.colorspace = HDMI_COLORSPACE_RGB;
+		}
+	}
+
+	if (config.hdmi_mode) {
+		mode = ADIHDMI_HDMI_CFG_MODE_HDMI;
+
+		switch (config.avi_infoframe.colorspace) {
+			case HDMI_COLORSPACE_YUV444:
+				output_format_422 = false;
+				output_format_ycbcr = true;
+				break;
+			case HDMI_COLORSPACE_YUV422:
+				output_format_422 = true;
+				output_format_ycbcr = true;
+				break;
+			default:
+				output_format_422 = false;
+				output_format_ycbcr = false;
+				break;
+		}
+	} else {
+		mode = ADIHDMI_HDMI_CFG_MODE_DVI;
+		output_format_422 = false;
+		output_format_ycbcr = false;
+	}
+
+	adihdmi_packet_disable(adihdmi, ADIHDMI_INFOFRAME_PACKETS);
+
+	adihdmi_set_colormap(adihdmi, config.csc_enable,
+			config.csc_coefficents,
+			config.csc_scaling_factor);
+
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_VIDEO_INPUT_CFG1, 0x81,
+			(output_format_422 << 7) | output_format_ycbcr);
+
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_HDCP_HDMI_CFG,
+			ADIHDMI_HDMI_CFG_MODE_MASK, mode);
+
+	hdmi_avi_infoframe_pack(&config.avi_infoframe, infoframe,
+			sizeof(infoframe));
+
+	/* The AVI infoframe id is not configurable */
+	regmap_bulk_write(adihdmi->regmap, ADIHDMI_REG_AVI_INFOFRAME_VERSION,
+			infoframe + 1, sizeof(infoframe) - 1);
+
+	adihdmi_packet_enable(adihdmi, ADIHDMI_INFOFRAME_PACKETS);
+}
+
+static void adihdmi_set_link_config(struct adihdmi *adihdmi,
+		const struct adihdmi_link_config *config)
+{
+	/*
+	 * The input style values documented in the datasheet don't match the
+	 * hardware register field values :-(
+	 */
+	static const unsigned int input_styles[4] = { 0, 2, 1, 3 };
+
+	unsigned int clock_delay;
+	unsigned int color_depth;
+	unsigned int input_id;
+
+	clock_delay = (config->clock_delay + 1200) / 400;
+	color_depth = config->input_color_depth == 8 ? 3
+		: (config->input_color_depth == 10 ? 1 : 2);
+
+	/* TODO Support input ID 6 */
+	if (config->input_colorspace != HDMI_COLORSPACE_YUV422)
+		input_id = config->input_clock == ADIHDMI_INPUT_CLOCK_DDR
+			? 5 : 0;
+	else if (config->input_clock == ADIHDMI_INPUT_CLOCK_DDR)
+		input_id = config->embedded_sync ? 8 : 7;
+	else if (config->input_clock == ADIHDMI_INPUT_CLOCK_2X)
+		input_id = config->embedded_sync ? 4 : 3;
+	else
+		input_id = config->embedded_sync ? 2 : 1;
+
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_I2C_FREQ_ID_CFG, 0xf,
+			input_id);
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_VIDEO_INPUT_CFG1, 0x7e,
+			(color_depth << 4) |
+			(input_styles[config->input_style] << 2));
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_VIDEO_INPUT_CFG2,
+			config->input_justification << 3);
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_TIMING_GEN_SEQ,
+			config->sync_pulse << 2);
+
+	regmap_write(adihdmi->regmap, 0xba, clock_delay << 5);
+
+	adihdmi->embedded_sync = config->embedded_sync;
+	adihdmi->hsync_polarity = config->hsync_polarity;
+	adihdmi->vsync_polarity = config->vsync_polarity;
+	adihdmi->rgb = config->input_colorspace == HDMI_COLORSPACE_RGB;
+}
+
+static void adihdmi_power_on(struct adihdmi *adihdmi)
+{
+	adihdmi->current_edid_segment = -1;
+
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_INT(0),
+			ADIHDMI_INT0_EDID_READY);
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_INT(1),
+			ADIHDMI_INT1_DDC_ERROR);
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_POWER,
+			ADIHDMI_POWER_POWER_DOWN, 0);
+
+	/*
+	 * Per spec it is allowed to pulse the HDP signal to indicate that the
+	 * EDID information has changed. Some monitors do this when they wakeup
+	 * from standby or are enabled. When the HDP goes low the adihdmi is
+	 * reset and the outputs are disabled which might cause the monitor to
+	 * go to standby again. To avoid this we ignore the HDP pin for the
+	 * first few seconds after enabling the output.
+	 */
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_POWER2,
+			ADIHDMI_REG_POWER2_HDP_SRC_MASK,
+			ADIHDMI_REG_POWER2_HDP_SRC_NONE);
+
+	/*
+	 * Most of the registers are reset during power down or when HPD is low.
+	 */
+	regcache_sync(adihdmi->regmap);
+
+	adihdmi->powered = true;
+}
+
+static void adihdmi_power_off(struct adihdmi *adihdmi)
+{
+	/* TODO: setup additional power down modes */
+	regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_POWER,
+			ADIHDMI_POWER_POWER_DOWN,
+			ADIHDMI_POWER_POWER_DOWN);
+	regcache_mark_dirty(adihdmi->regmap);
+
+	adihdmi->powered = false;
+}
+
+/* -----------------------------------------------------------------------------
+ * Interrupt and hotplug detection
+ */
+
+static bool adihdmi_hpd(struct adihdmi *adihdmi)
+{
+	unsigned int irq0;
+	int ret;
+
+	ret = regmap_read(adihdmi->regmap, ADIHDMI_REG_INT(0), &irq0);
+	if (ret < 0)
+		return false;
+
+	if (irq0 & ADIHDMI_INT0_HDP) {
+		regmap_write(adihdmi->regmap, ADIHDMI_REG_INT(0),
+				ADIHDMI_INT0_HDP);
+		return true;
+	}
+
+	return false;
+}
+
+static int adihdmi_irq_process(struct adihdmi *adihdmi)
+{
+	unsigned int irq0, irq1;
+	int ret;
+
+	ret = regmap_read(adihdmi->regmap, ADIHDMI_REG_INT(0), &irq0);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_read(adihdmi->regmap, ADIHDMI_REG_INT(1), &irq1);
+	if (ret < 0)
+		return ret;
+
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_INT(0), irq0);
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_INT(1), irq1);
+
+	if (irq0 & ADIHDMI_INT0_HDP)
+		drm_helper_hpd_irq_event(adihdmi->encoder->dev);
+
+	if (irq0 & ADIHDMI_INT0_EDID_READY || irq1 & ADIHDMI_INT1_DDC_ERROR) {
+		adihdmi->edid_read = true;
+
+		if (adihdmi->i2c_main->irq)
+			wake_up_all(&adihdmi->wq);
+	}
+
+	return 0;
+}
+
+static irqreturn_t adihdmi_irq_handler(int irq, void *devid)
+{
+	struct adihdmi *adihdmi = devid;
+	int ret;
+
+	ret = adihdmi_irq_process(adihdmi);
+	return ret < 0 ? IRQ_NONE : IRQ_HANDLED;
+}
+
+/* -----------------------------------------------------------------------------
+ * EDID retrieval
+ */
+
+static int adihdmi_wait_for_edid(struct adihdmi *adihdmi, int timeout)
+{
+	int ret;
+
+	if (adihdmi->i2c_main->irq) {
+		ret = wait_event_interruptible_timeout(adihdmi->wq,
+				adihdmi->edid_read, msecs_to_jiffies(timeout));
+	} else {
+		for (; timeout > 0; timeout -= 25) {
+			ret = adihdmi_irq_process(adihdmi);
+			if (ret < 0)
+				break;
+
+			if (adihdmi->edid_read)
+				break;
+
+			msleep(25);
+		}
+	}
+
+	return adihdmi->edid_read ? 0 : -EIO;
+}
+
+static int adihdmi_get_edid_block(void *data, u8 *buf, unsigned int block,
+		size_t len)
+{
+	struct adihdmi *adihdmi = data;
+	struct i2c_msg xfer[2];
+	uint8_t offset;
+	unsigned int i;
+	int ret;
+
+	if (len > 128)
+		return -EINVAL;
+
+	if (adihdmi->current_edid_segment != block / 2) {
+		unsigned int status;
+
+		ret = regmap_read(adihdmi->regmap, ADIHDMI_REG_DDC_STATUS,
+				&status);
+		if (ret < 0)
+			return ret;
+
+		if (status != 2) {
+			adihdmi->edid_read = false;
+			regmap_write(adihdmi->regmap, ADIHDMI_REG_EDID_SEGMENT,
+					block);
+			ret = adihdmi_wait_for_edid(adihdmi, 200);
+			if (ret < 0)
+				return ret;
+		}
+
+		/* Break this apart, hopefully more I2C controllers will
+		 * support 64 byte transfers than 256 byte transfers
+		 */
+
+		xfer[0].addr = adihdmi->i2c_edid->addr;
+		xfer[0].flags = 0;
+		xfer[0].len = 1;
+		xfer[0].buf = &offset;
+		xfer[1].addr = adihdmi->i2c_edid->addr;
+		xfer[1].flags = I2C_M_RD;
+		xfer[1].len = 64;
+		xfer[1].buf = adihdmi->edid_buf;
+
+		offset = 0;
+
+		for (i = 0; i < 4; ++i) {
+			ret = i2c_transfer(adihdmi->i2c_edid->adapter, xfer,
+					ARRAY_SIZE(xfer));
+			if (ret < 0)
+				return ret;
+			else if (ret != 2)
+				return -EIO;
+
+			xfer[1].buf += 64;
+			offset += 64;
+		}
+
+		adihdmi->current_edid_segment = block / 2;
+	}
+
+	if (block % 2 == 0)
+		memcpy(buf, adihdmi->edid_buf, len);
+	else
+		memcpy(buf, adihdmi->edid_buf + 128, len);
+
+	return 0;
+}
+
+static int adihdmi_mode_valid(struct drm_display_mode *mode)
+{
+	if (mode->clock > 165000)
+		return MODE_CLOCK_HIGH;
+
+	return MODE_OK;
+}
+
+/* -----------------------------------------------------------------------------
+ * DT and private structure operations
+ */
+
+#define conn_to_adihdmi2(x) \
+	container_of(x, struct adihdmi2, connector);
+
+#define enc_to_adihdmi2(x) \
+	container_of(x, struct adihdmi2, encoder);
+
+#define enc_to_adihdmi(x) \
+	(&(container_of(x, struct adihdmi2, encoder)->base))
+
+static int adihdmi_parse_dt(struct device_node *np,
+		struct adihdmi_link_config *config)
+{
+	memset(config, 0, sizeof(*config));
+
+	config->input_color_depth = 8;
+
+	config->input_colorspace = HDMI_COLORSPACE_RGB;
+	//config->input_colorspace = HDMI_COLORSPACE_YUV422;
+	//config->input_colorspace = HDMI_COLORSPACE_YUV444;
+
+	config->input_clock = ADIHDMI_INPUT_CLOCK_1X;
+	//config->input_clock = ADIHDMI_INPUT_CLOCK_2X;
+	//config->input_clock = ADIHDMI_INPUT_CLOCK_DDR;
+
+	if (config->input_colorspace == HDMI_COLORSPACE_YUV422 ||
+			config->input_clock != ADIHDMI_INPUT_CLOCK_1X) {
+
+		config->input_style = 1;
+		//config->input_justification = ADIHDMI_INPUT_JUSTIFICATION_LEFT;
+		config->input_justification = ADIHDMI_INPUT_JUSTIFICATION_EVENLY;
+		//config->input_justification = ADIHDMI_INPUT_JUSTIFICATION_RIGHT;
+
+	} else {
+		config->input_style = 1;
+		config->input_justification = ADIHDMI_INPUT_JUSTIFICATION_LEFT;
+	}
+
+	config->clock_delay = 0;
+	config->embedded_sync = 0;
+
+	/* Hardcode the sync pulse configurations for now. */
+	config->sync_pulse = ADIHDMI_INPUT_SYNC_PULSE_NONE;
+	config->vsync_polarity = ADIHDMI_SYNC_POLARITY_PASSTHROUGH;
+	config->hsync_polarity = ADIHDMI_SYNC_POLARITY_PASSTHROUGH;
+
+	return 0;
+}
+
+static const int edid_i2c_addr = 0x7e;
+static const int packet_i2c_addr = 0x70;
+static const int cec_i2c_addr = 0x78;
+
+static int adihdmi_create(struct i2c_client *i2c, struct adihdmi *adihdmi)
+{
+	struct adihdmi_link_config link_config;
+	struct device *dev = &i2c->dev;
+	unsigned int val;
+	int ret;
+
+	adihdmi->powered = false;
+	adihdmi->status = connector_status_disconnected;
+
+	ret = adihdmi_parse_dt(NULL, &link_config);
+	if (ret)
+	{
+		pr_err("%s - %d - Bad parse\n", __FUNCTION__, __LINE__);
+		return -EINVAL;
+	}
+
+	/*
+	 * The power down GPIO is optional. If present, toggle it from active to
+	 * inactive to wake up the encoder.
+	 */
+	adihdmi->gpio_pd = devm_gpiod_get_optional(dev, "pd", GPIOD_OUT_HIGH);
+	if (IS_ERR(adihdmi->gpio_pd))
+	{
+		pr_err("%s - %d - Bad PD GPIO\n", __FUNCTION__, __LINE__);
+		return PTR_ERR(adihdmi->gpio_pd);
+	}
+
+	if (adihdmi->gpio_pd) {
+		mdelay(5);
+		gpiod_set_value_cansleep(adihdmi->gpio_pd, 0);
+	}
+
+	adihdmi->regmap = devm_regmap_init_i2c(i2c, &adihdmi_regmap_config);
+	if (IS_ERR(adihdmi->regmap))
+	{
+		pr_err("%s - %d - Bad reg map init\n", __FUNCTION__, __LINE__);
+		return PTR_ERR(adihdmi->regmap);
+	}
+
+	ret = regmap_read(adihdmi->regmap, ADIHDMI_REG_CHIP_REVISION, &val);
+	if (ret)
+	{
+		pr_err("%s - %d - Bad reg map read\n", __FUNCTION__, __LINE__);
+		return ret;
+	}
+	dev_dbg(dev, "Rev. %d\n", val);
+
+	ret = regmap_register_patch(adihdmi->regmap, adihdmi_fixed_registers,
+			ARRAY_SIZE(adihdmi_fixed_registers));
+	if (ret)
+	{
+		pr_err("%s - %d - Bad reg map patch\n", __FUNCTION__, __LINE__);
+		return ret;
+	}
+
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_EDID_I2C_ADDR, edid_i2c_addr);
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_PACKET_I2C_ADDR,
+			packet_i2c_addr);
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_CEC_I2C_ADDR, cec_i2c_addr);
+	adihdmi_packet_disable(adihdmi, 0xffff);
+
+	adihdmi->i2c_main = i2c;
+	adihdmi->i2c_edid = i2c_new_dummy(i2c->adapter, edid_i2c_addr >> 1);
+	if (!adihdmi->i2c_edid)
+	{
+		pr_err("%s - %d - No mem for EDID\n", __FUNCTION__, __LINE__);
+		return -ENOMEM;
+	}
+
+	if (i2c->irq) {
+		init_waitqueue_head(&adihdmi->wq);
+
+		ret = devm_request_threaded_irq(dev, i2c->irq, NULL,
+				adihdmi_irq_handler,
+				IRQF_ONESHOT, dev_name(dev),
+				adihdmi);
+		if (ret)
+		{
+			pr_err("%s - %d - Bad IRQ thread request\n", __FUNCTION__, __LINE__);
+			goto err_i2c_unregister_device;
+		}
+	}
+
+	/* CEC is unused for now */
+	regmap_write(adihdmi->regmap, ADIHDMI_REG_CEC_CTRL,
+			ADIHDMI_CEC_CTRL_POWER_DOWN);
+
+	adihdmi_power_off(adihdmi);
+
+	adihdmi_set_link_config(adihdmi, &link_config);
+
+	adihdmi_audio_setup(adihdmi);
+
+	return 0;
+
+err_i2c_unregister_device:
+	i2c_unregister_device(adihdmi->i2c_edid);
+
+	return ret;
+}
+
+static void adihdmi_destroy(struct adihdmi *priv)
+{
+	i2c_unregister_device(priv->i2c_edid);
+}
+
+/* -----------------------------------------------------------------------------
+ * Encoder operations
+ */
+
+static int adihdmi_encoder_get_modes(struct adihdmi *adihdmi,
+		struct drm_connector *connector)
+{
+	struct edid *edid;
+	unsigned int count;
+
+	/* Reading the EDID only works if the device is powered */
+	if (!adihdmi->powered) {
+		regmap_write(adihdmi->regmap, ADIHDMI_REG_INT(0),
+				ADIHDMI_INT0_EDID_READY);
+		regmap_write(adihdmi->regmap, ADIHDMI_REG_INT(1),
+				ADIHDMI_INT1_DDC_ERROR);
+		regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_POWER,
+				ADIHDMI_POWER_POWER_DOWN, 0);
+		adihdmi->current_edid_segment = -1;
+	}
+
+	edid = drm_do_get_edid(connector, adihdmi_get_edid_block, adihdmi);
+
+	if (!adihdmi->powered)
+		regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_POWER,
+				ADIHDMI_POWER_POWER_DOWN,
+				ADIHDMI_POWER_POWER_DOWN);
+
+	kfree(adihdmi->edid);
+	adihdmi->edid = edid;
+	if (!edid)
+	{
+		pr_err("%s - %d - No EDID\n", __FUNCTION__, __LINE__);
+		return 0;
+	}
+
+	drm_mode_connector_update_edid_property(connector, edid);
+	count = drm_add_edid_modes(connector, edid);
+
+	adihdmi_set_config_csc(adihdmi, connector, adihdmi->rgb);
+
+	return count;
+}
+
+static void adihdmi_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct adihdmi2 *priv2 = enc_to_adihdmi2(encoder);
+
+	if (mode == DRM_MODE_DPMS_ON)
+		adihdmi_power_on(&priv2->base);
+	else
+		adihdmi_power_off(&priv2->base);
+}
+
+	static enum drm_connector_status
+adihdmi_encoder_detect(struct adihdmi *adihdmi,
+		struct drm_connector *connector)
+{
+	enum drm_connector_status status;
+	unsigned int val;
+	bool hpd;
+	int ret;
+
+	ret = regmap_read(adihdmi->regmap, ADIHDMI_REG_STATUS, &val);
+	if (ret < 0)
+	{
+		pr_err("%s - %d - Disconnected\n", __FUNCTION__, __LINE__);
+		return connector_status_disconnected;
+	}
+
+	if (val & ADIHDMI_STATUS_HPD)
+		status = connector_status_connected;
+	else
+		status = connector_status_disconnected;
+
+	hpd = adihdmi_hpd(adihdmi);
+
+	/* The chip resets itself when the cable is disconnected, so in case
+	 * there is a pending HPD interrupt and the cable is connected there was
+	 * at least one transition from disconnected to connected and the chip
+	 * has to be reinitialized. */
+	if (status == connector_status_connected && hpd && adihdmi->powered) {
+		regcache_mark_dirty(adihdmi->regmap);
+		adihdmi_power_on(adihdmi);
+		adihdmi_encoder_get_modes(adihdmi, connector);
+		if (adihdmi->status == connector_status_connected)
+			status = connector_status_disconnected;
+	} else {
+		/* Renable HDP sensing */
+		regmap_update_bits(adihdmi->regmap, ADIHDMI_REG_POWER2,
+				ADIHDMI_REG_POWER2_HDP_SRC_MASK,
+				ADIHDMI_REG_POWER2_HDP_SRC_BOTH);
+	}
+
+	adihdmi->status = status;
+	return status;
+}
+
+static bool adihdmi_encoder_mode_fixup(struct drm_encoder *encoder, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static int adihdmi_encoder_mode_valid(struct drm_encoder *encoder, struct drm_display_mode *mode)
+{
+	return adihdmi_mode_valid(mode);
+}
+
+static void adihdmi_encoder_mode_set(struct drm_encoder *encoder,
+		struct drm_display_mode *mode,
+		struct drm_display_mode *adj_mode)
+{
+	unsigned int low_refresh_rate;
+	unsigned int hsync_polarity = 0;
+	unsigned int vsync_polarity = 0;
+	struct adihdmi *adihdmi = enc_to_adihdmi(encoder);
+
+	if (adihdmi->embedded_sync) {
+		unsigned int hsync_offset, hsync_len;
+		unsigned int vsync_offset, vsync_len;
+
+		hsync_offset = adj_mode->crtc_hsync_start -
+			adj_mode->crtc_hdisplay;
+		vsync_offset = adj_mode->crtc_vsync_start -
+			adj_mode->crtc_vdisplay;
+		hsync_len = adj_mode->crtc_hsync_end -
+			adj_mode->crtc_hsync_start;
+		vsync_len = adj_mode->crtc_vsync_end -
+			adj_mode->crtc_vsync_start;
+
+		/* The hardware vsync generator has a off-by-one bug */
+		vsync_offset += 1;
+
+		regmap_write(adihdmi->regmap, ADIHDMI_REG_HSYNC_PLACEMENT_MSB,
+				((hsync_offset >> 10) & 0x7) << 5);
+		regmap_write(adihdmi->regmap, ADIHDMI_REG_SYNC_DECODER(0),
+				(hsync_offset >> 2) & 0xff);
+		regmap_write(adihdmi->regmap, ADIHDMI_REG_SYNC_DECODER(1),
+				((hsync_offset & 0x3) << 6) |
+				((hsync_len >> 4) & 0x3f));
+		regmap_write(adihdmi->regmap, ADIHDMI_REG_SYNC_DECODER(2),
+				((hsync_len & 0xf) << 4) |
+				((vsync_offset >> 6) & 0xf));
+		regmap_write(adihdmi->regmap, ADIHDMI_REG_SYNC_DECODER(3),
+				((vsync_offset & 0x3f) << 2) |
+				((vsync_len >> 8) & 0x3));
+		regmap_write(adihdmi->regmap, ADIHDMI_REG_SYNC_DECODER(4),
+				vsync_len & 0xff);
+
+		hsync_polarity = !(adj_mode->flags & DRM_MODE_FLAG_PHSYNC);
+		vsync_polarity = !(adj_mode->flags & DRM_MODE_FLAG_PVSYNC);
+	} else {
+		enum adihdmi_sync_polarity mode_hsync_polarity;
+		enum adihdmi_sync_polarity mode_vsync_polarity;
+
+		/**
+		 * If the input signal is always low or always high we want to
+		 * invert or let it passthrough depending on the polarity of the
+		 * current mode.
+		 **/
+		if (adj_mode->flags & DRM_MODE_FLAG_NHSYNC)
+			mode_hsync_polarity = ADIHDMI_SYNC_POLARITY_LOW;
+		else
+			mode_hsync_polarity = ADIHDMI_SYNC_POLARITY_HIGH;
+
+		if (adj_mode->flags & DRM_MODE_FLAG_NVSYNC)
+			mode_vsync_polarity = ADIHDMI_SYNC_POLARITY_LOW;
+		else
+			mode_vsync_polarity = ADIHDMI_SYNC_POLARITY_HIGH;
+
+		if (adihdmi->hsync_polarity != mode_hsync_polarity &&
+				adihdmi->hsync_polarity !=
+				ADIHDMI_SYNC_POLARITY_PASSTHROUGH)
+			hsync_polarity = 1;
+
+		if (adihdmi->vsync_polarity != mode_vsync_polarity &&
+				adihdmi->vsync_polarity !=
+				ADIHDMI_SYNC_POLARITY_PASSTHROUGH)
+			vsync_polarity = 1;
+	}
+
+	if (mode->vrefresh <= 24000)
+		low_refresh_rate = ADIHDMI_LOW_REFRESH_RATE_24HZ;
+	else if (mode->vrefresh <= 25000)
+		low_refresh_rate = ADIHDMI_LOW_REFRESH_RATE_25HZ;
+	else if (mode->vrefresh <= 30000)
+		low_refresh_rate = ADIHDMI_LOW_REFRESH_RATE_30HZ;
+	else
+		low_refresh_rate = ADIHDMI_LOW_REFRESH_RATE_NONE;
+
+	regmap_update_bits(adihdmi->regmap, 0xfb,
+			0x6, low_refresh_rate << 1);
+	regmap_update_bits(adihdmi->regmap, 0x17,
+			0x60, (vsync_polarity << 6) | (hsync_polarity << 5));
+
+	/*
+	 * TODO Test first order 4:2:2 to 4:4:4 up conversion method, which is
+	 * supposed to give better results.
+	 */
+
+			adihdmi->f_tmds = mode->clock;
+}
+
+static void adihdmi_encoder_prepare(struct drm_encoder *encoder)
+{
+	adihdmi_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static void adihdmi_encoder_commit(struct drm_encoder *encoder)
+{
+	adihdmi_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+}
+
+static struct drm_encoder_helper_funcs adihdmi_encoder_helper_funcs = {
+	.dpms		= adihdmi_encoder_dpms,
+	.mode_fixup	= adihdmi_encoder_mode_fixup,
+	.prepare	= adihdmi_encoder_prepare,
+	.commit		= adihdmi_encoder_commit,
+	.mode_set	= adihdmi_encoder_mode_set,
+};
+
+static void adihdmi_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct adihdmi2 *priv = enc_to_adihdmi2(encoder);
+
+	adihdmi_destroy(&priv->base);
+	drm_encoder_cleanup(encoder);
+}
+
+static const struct drm_encoder_funcs adihdmi_encoder_funcs = {
+	.destroy = adihdmi_encoder_destroy,
+};
+
+/* -----------------------------------------------------------------------------
+ * Slave operations
+ */
+
+static int adihdmi_encoder_slave_create_resources(struct drm_encoder *encoder, struct drm_connector *connector)
+{
+	pr_debug("%s - %d\n", __FUNCTION__, __LINE__);
+	return 0;
+}
+
+static void adihdmi_encoder_slave_destroy(struct drm_encoder *encoder)
+{
+	pr_debug("%s - %d\n", __FUNCTION__, __LINE__);
+}
+
+	static enum drm_connector_status
+adihdmi_encoder_slave_detect(struct drm_encoder *encoder,
+		struct drm_connector *connector)
+{
+	return adihdmi_encoder_detect(enc_to_adihdmi(encoder),
+			connector);
+}
+
+static int adihdmi_encoder_slave_get_modes(struct drm_encoder *encoder,
+		struct drm_connector *connector)
+{
+	return adihdmi_encoder_get_modes(enc_to_adihdmi(encoder),
+			connector);
+}
+
+
+static void adihdmi_encoder_slave_set_config(struct drm_encoder *encoder, void *params)
+{
+	pr_debug("%s - %d\n", __FUNCTION__, __LINE__);
+}
+
+static int adihdmi_encoder_set_property(struct drm_encoder *encoder, struct drm_connector *connector, struct drm_property *property, uint64_t val)
+{
+	pr_debug("%s - %d\n", __FUNCTION__, __LINE__);
+	return 0;
+}
+
+static struct drm_encoder_slave_funcs adihdmi_encoder_slave_funcs = {
+	.create_resources   = adihdmi_encoder_slave_create_resources,
+	.destroy            = adihdmi_encoder_slave_destroy,
+	.detect             = adihdmi_encoder_slave_detect,
+	.dpms               = adihdmi_encoder_dpms,
+	.get_modes          = adihdmi_encoder_slave_get_modes,
+	.mode_fixup         = adihdmi_encoder_mode_fixup,
+	.mode_set           = adihdmi_encoder_mode_set,
+	.mode_valid         = adihdmi_encoder_mode_valid,
+	.set_config         = adihdmi_encoder_slave_set_config,
+	.set_property       = adihdmi_encoder_set_property,
+};
+
+/* -----------------------------------------------------------------------------
+ * Connector operations
+ */
+
+static int adihdmi_connector_get_modes(struct drm_connector *connector)
+{
+	struct adihdmi2 *priv = conn_to_adihdmi2(connector);
+
+	return adihdmi_encoder_get_modes(&priv->base, connector);
+}
+
+static int adihdmi_connector_mode_valid(struct drm_connector *connector,
+		struct drm_display_mode *mode)
+{
+	return adihdmi_mode_valid(mode);
+}
+
+	static struct drm_encoder *
+adihdmi_connector_best_encoder(struct drm_connector *connector)
+{
+	struct adihdmi2 *priv = conn_to_adihdmi2(connector);
+
+	return &priv->encoder;
+}
+
+static struct drm_connector_helper_funcs adihdmi_connector_helper_funcs = {
+	.get_modes          = adihdmi_connector_get_modes,
+	.mode_valid         = adihdmi_connector_mode_valid,
+	.best_encoder	    = adihdmi_connector_best_encoder,
+};
+
+	static enum drm_connector_status
+adihdmi_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct adihdmi2 *priv = conn_to_adihdmi2(connector);
+
+	return adihdmi_encoder_detect(&priv->base, connector);
+}
+
+static void adihdmi_connector_destroy(struct drm_connector *connector)
+{
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+}
+
+static struct drm_connector_funcs adihdmi_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.detect = adihdmi_connector_detect,
+	.destroy = adihdmi_connector_destroy,
+};
+
+/* -----------------------------------------------------------------------------
+ * Component operations
+ */
+
+static int adihdmi_bind(struct device *dev, struct device *master, void *data)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct drm_device *drm = data;
+	struct adihdmi2 *priv;
+	uint32_t crtcs = 0;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+	{
+		pr_err("%s - %d - No memory for ADIHDMI\n", __FUNCTION__, __LINE__);
+		return -ENOMEM;
+	}
+
+	dev_set_drvdata(dev, priv);
+
+	if (dev->of_node)
+		crtcs = drm_of_find_possible_crtcs(drm, dev->of_node);
+
+	/* If no CRTCs were found, fall back to our old behaviour */
+	if (crtcs == 0) {
+		dev_warn(dev, "Falling back to first CRTC\n");
+		crtcs = 1 << 0;
+	}
+
+	priv->base.encoder = &priv->encoder;
+	priv->connector.interlace_allowed = 1;
+	priv->encoder.possible_crtcs = crtcs;
+
+	ret = adihdmi_create(client, &priv->base);
+	if (ret)
+		return ret;
+
+	drm_encoder_helper_add(&priv->encoder, &adihdmi_encoder_helper_funcs);
+	ret = drm_encoder_init(drm, &priv->encoder, &adihdmi_encoder_funcs,
+			DRM_MODE_ENCODER_TMDS, NULL);
+	if (ret)
+		goto err_encoder;
+
+	drm_connector_helper_add(&priv->connector,
+			&adihdmi_connector_helper_funcs);
+	ret = drm_connector_init(drm, &priv->connector,
+			&adihdmi_connector_funcs,
+			DRM_MODE_CONNECTOR_HDMIA);
+	if (ret)
+		goto err_connector;
+
+	ret = drm_connector_register(&priv->connector);
+	if (ret)
+		goto err_sysfs;
+
+	priv->connector.encoder = &priv->encoder;
+	drm_mode_connector_attach_encoder(&priv->connector, &priv->encoder);
+
+	return 0;
+
+err_sysfs:
+	drm_connector_cleanup(&priv->connector);
+err_connector:
+	drm_encoder_cleanup(&priv->encoder);
+err_encoder:
+	adihdmi_destroy(&priv->base);
+	return ret;
+
+
+}
+
+static void adihdmi_unbind(struct device *dev, struct device *master, void *data)
+{
+	struct adihdmi2 *priv = dev_get_drvdata(dev);
+
+	drm_connector_cleanup(&priv->connector);
+	drm_encoder_cleanup(&priv->encoder);
+	adihdmi_destroy(&priv->base);
+}
+
+static const struct component_ops adihdmi_ops =
+{
+	.bind = adihdmi_bind,
+	.unbind = adihdmi_unbind,
+};
+
+/* -----------------------------------------------------------------------------
+ * Init operations
+ */
+
+static int adihdmi_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
+{
+	return component_add(&i2c->dev, &adihdmi_ops);
+}
+
+static int adihdmi_remove(struct i2c_client *i2c)
+{
+	component_del(&i2c->dev, &adihdmi_ops);
+
+	return 0;
+}
+
+static int adihdmi_encoder_init(struct i2c_client *i2c, struct drm_device *dev,
+		struct drm_encoder_slave *encoder_slave)
+{
+
+	struct adihdmi *adihdmi;
+	int ret;
+
+	adihdmi = kzalloc(sizeof(*adihdmi), GFP_KERNEL);
+	if (!adihdmi)
+		return -ENOMEM;
+
+	adihdmi->encoder = &encoder_slave->base;
+
+	ret = adihdmi_create(i2c, adihdmi);
+	if (ret) {
+		kfree(adihdmi);
+		return ret;
+	}
+
+	encoder_slave->slave_priv = adihdmi;
+	encoder_slave->slave_funcs = &adihdmi_encoder_slave_funcs;
+
+	return 0;
+}
+
+static const struct i2c_device_id adihdmi_i2c_ids[] = {
+	{ "adv7511", 0 },
+	{ "adv7511w", 0 },
+	{ "adv7513", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, adihdmi_i2c_ids);
+
+static const struct of_device_id adihdmi_of_ids[] = {
+	{ .compatible = "adi,adv7511", },
+	{ .compatible = "adi,adv7511w", },
+	{ .compatible = "adi,adv7513", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, adihdmi_of_ids);
+
+static struct drm_i2c_encoder_driver adihdmi_driver = {
+	.i2c_driver = {
+		.driver = {
+			.name = "adihdmi",
+			.of_match_table = adihdmi_of_ids,
+		},
+		.id_table = adihdmi_i2c_ids,
+		.probe = adihdmi_probe,
+		.remove = adihdmi_remove,
+	},
+
+	.encoder_init = adihdmi_encoder_init,
+};
+
+static int __init adihdmi_init(void)
+{
+	return drm_i2c_encoder_register(THIS_MODULE, &adihdmi_driver);
+}
+module_init(adihdmi_init);
+
+static void __exit adihdmi_exit(void)
+{
+	drm_i2c_encoder_unregister(&adihdmi_driver);
+}
+module_exit(adihdmi_exit);
+
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_DESCRIPTION("ADIHDMI HDMI transmitter driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c b/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c
index 46855c8..d189426 100644
--- a/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c
+++ b/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c
@@ -234,25 +234,30 @@ static int tpd_probe(struct platform_device *pdev)
 	if (r)
 		return r;
 
-
 	gpio = devm_gpiod_get_index_optional(&pdev->dev, NULL, 0,
 		 GPIOD_OUT_LOW);
-	if (IS_ERR(gpio))
+	if (IS_ERR(gpio)) {
+		r = PTR_ERR(gpio);
 		goto err_gpio;
+	}
 
 	ddata->ct_cp_hpd_gpio = gpio;
 
 	gpio = devm_gpiod_get_index_optional(&pdev->dev, NULL, 1,
 		 GPIOD_OUT_LOW);
-	if (IS_ERR(gpio))
+	if (IS_ERR(gpio)) {
+		r = PTR_ERR(gpio);
 		goto err_gpio;
+	}
 
 	ddata->ls_oe_gpio = gpio;
 
 	gpio = devm_gpiod_get_index(&pdev->dev, NULL, 2,
 		GPIOD_IN);
-	if (IS_ERR(gpio))
+	if (IS_ERR(gpio)) {
+		r = PTR_ERR(gpio);
 		goto err_gpio;
+	}
 
 	ddata->hpd_gpio = gpio;
 
diff --git a/drivers/input/misc/tps65218-pwrbutton.c b/drivers/input/misc/tps65218-pwrbutton.c
index 3273217..6f26022 100644
--- a/drivers/input/misc/tps65218-pwrbutton.c
+++ b/drivers/input/misc/tps65218-pwrbutton.c
@@ -36,7 +36,7 @@ struct tps6521x_data {
 static const struct tps6521x_data tps65217_data = {
 	.reg_status = TPS65217_REG_STATUS,
 	.pb_mask = TPS65217_STATUS_PB,
-	.name = "tps65217_pwrbutton",
+	.name = "tps65217_pwr_but",
 };
 
 static const struct tps6521x_data tps65218_data = {
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 64971ba..0fbd1c5 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -766,6 +766,36 @@ config PANEL_BOOT_MESSAGE
 	  An empty message will only clear the display at driver init time. Any other
 	  printf()-formatted message is valid with newline and escape codes.
 
+config BONE_CAPEMGR
+	tristate "Beaglebone cape manager"
+	depends on ARCH_OMAP2PLUS && OF
+	select EEPROM
+	select OF_OVERLAY
+	help
+	  Say Y here to include support for automatic loading of
+	  beaglebone capes. Select M to build as a module which
+	  will be named bone_capemgr.
+
+config DEV_OVERLAYMGR
+	tristate "Device overlay manager"
+	depends on OF
+	select OF_OVERLAY
+	default n
+	help
+	  Say Y here to include support for the automagical dev
+	  overlay manager.
+
+config TIEQEP
+	tristate "EQEP Hardware quadrature encoder controller"
+	depends on SOC_AM33XX
+	select PWM_TIPWMSS
+	help
+	  Driver support for the EQEP quadrature encoder controller AM33XX
+	  TI SOC
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called tieqep.
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
@@ -775,7 +805,9 @@ source "drivers/misc/altera-stapl/Kconfig"
 source "drivers/misc/mei/Kconfig"
 source "drivers/misc/vmw_vmci/Kconfig"
 source "drivers/misc/mic/Kconfig"
+source "drivers/misc/cape_bone_argus/Kconfig"
 source "drivers/misc/genwqe/Kconfig"
+source "drivers/misc/cape/Kconfig"
 source "drivers/misc/echo/Kconfig"
 source "drivers/misc/cxl/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 3198336..7fc8cfd 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -48,11 +48,16 @@ obj-$(CONFIG_VMWARE_VMCI)	+= vmw_vmci/
 obj-$(CONFIG_LATTICE_ECP3_CONFIG)	+= lattice-ecp3-config.o
 obj-$(CONFIG_SRAM)		+= sram.o
 obj-y				+= mic/
+obj-y				+= cape_bone_argus/
 obj-$(CONFIG_GENWQE)		+= genwqe/
+obj-y				+= cape/
 obj-$(CONFIG_ECHO)		+= echo/
 obj-$(CONFIG_VEXPRESS_SYSCFG)	+= vexpress-syscfg.o
 obj-$(CONFIG_CXL_BASE)		+= cxl/
 obj-$(CONFIG_PANEL)             += panel.o
+obj-$(CONFIG_TIEQEP)		+= tieqep.o
+obj-$(CONFIG_BONE_CAPEMGR)	+= bone_capemgr.o
+obj-$(CONFIG_DEV_OVERLAYMGR)	+= devovmgr.o
 
 lkdtm-$(CONFIG_LKDTM)		+= lkdtm_core.o
 lkdtm-$(CONFIG_LKDTM)		+= lkdtm_bugs.o
diff --git b/drivers/misc/bone_capemgr.c b/drivers/misc/bone_capemgr.c
new file mode 100644
index 0000000..78c5f44
--- /dev/null
+++ b/drivers/misc/bone_capemgr.c
@@ -0,0 +1,1898 @@
+/*
+ * TI Beaglebone cape manager
+ *
+ * Copyright (C) 2012 Texas Instruments Inc.
+ * Copyright (C) 2012-2015 Konsulko Group.
+ * Author: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/completion.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/firmware.h>
+#include <linux/err.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/memory.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/nvmem-consumer.h>
+
+/* disabled capes */
+static char *disable_partno;
+module_param(disable_partno, charp, 0444);
+MODULE_PARM_DESC(disable_partno,
+		"Comma delimited list of PART-NUMBER[:REV] of disabled capes");
+
+/* enable capes */
+static char *enable_partno;
+module_param(enable_partno, charp, 0444);
+MODULE_PARM_DESC(enable_partno,
+		"Comma delimited list of PART-NUMBER[:REV] of enabled capes");
+
+/* delay to scan on boot until rootfs appears */
+static int boot_scan_period = 1000;
+module_param(boot_scan_period, int, 0444);
+MODULE_PARM_DESC(boot_scan_period,
+		"boot scan period until rootfs firmware is available");
+
+static int uboot_capemgr_enabled = 0;
+module_param(uboot_capemgr_enabled, int, 0444);
+MODULE_PARM_DESC(uboot_capemgr_enabled,
+		 "U-Boot Cape Manager is enabled (0=Kernel Cape Manager [default], 1=Disable Kernel Cape Manager)");
+
+struct capemgr_info;
+
+struct slot_ee_attribute {
+	struct device_attribute devattr;
+	unsigned int field;
+	struct bone_cape_slot *slot;	/* this is filled when instantiated */
+};
+#define to_slot_ee_attribute(x) \
+	container_of((x), struct slot_ee_attribute, devattr)
+
+struct bbrd_ee_attribute {
+	struct device_attribute devattr;
+	unsigned int field;
+};
+#define to_bbrd_ee_attribute(x) \
+	container_of((x), struct bbrd_ee_attribute, devattr)
+
+struct bone_cape_slot {
+	struct list_head	node;
+	struct capemgr_info	*info;
+	int			slotno;
+	struct nvmem_cell	*nvmem_cell;
+
+	char			text_id[256];
+	char			signature[256];
+	/* quick access */
+	char			board_name[32+1];
+	char			version[4+1];
+	char			manufacturer[16+1];
+	char			part_number[16+1];
+
+	/* attribute group */
+	char			*ee_attr_name;
+	int			ee_attrs_count;
+	struct slot_ee_attribute *ee_attrs;
+	struct attribute	**ee_attrs_tab;
+	struct attribute_group	attrgroup;
+
+	/* state flags */
+	unsigned int		probed : 1;
+	unsigned int		probe_failed : 1;
+	unsigned int		override : 1;
+	unsigned int		loading : 1;
+	unsigned int		loaded : 1;
+	unsigned int		retry_loading : 1;
+	unsigned int		disabled : 1;
+
+	char			*dtbo;
+	const struct firmware	*fw;
+	struct device_node	*overlay;
+	int			overlay_id;
+
+	/* loader thread */
+	struct task_struct	*loader_thread;
+
+	/* load priority */
+	int priority;
+};
+
+struct bone_baseboard {
+
+	/* from the matched boardmap node */
+	char			*compatible_name;
+
+	/* filled in by reading the eeprom */
+	char			signature[256];
+	char			text_id[64+1];
+
+	/* quick access */
+	char			board_name[8+1];
+	char			revision[4+1];
+	char			serial_number[12+1];
+
+	/* access to the eeprom */
+	struct nvmem_cell	*nvmem_cell;
+};
+
+struct capemgr_info {
+	struct platform_device	*pdev;
+
+	atomic_t next_slot_nr;
+	struct list_head	slot_list;
+	struct mutex		slots_list_mutex;
+
+	/* baseboard EEPROM data */
+	struct bone_baseboard	baseboard;
+
+	/* wait queue for keeping the priorities straight */
+	wait_queue_head_t	load_wq;
+};
+
+static int bone_slot_fill_override(struct bone_cape_slot *slot,
+		const char *part_number, const char *version);
+static struct bone_cape_slot *capemgr_add_slot(
+		struct capemgr_info *info, const char *slot_name,
+		const char *part_number, const char *version, int prio);
+static int capemgr_remove_slot_no_lock(struct bone_cape_slot *slot);
+static int capemgr_remove_slot(struct bone_cape_slot *slot);
+static int capemgr_load_slot(struct bone_cape_slot *slot);
+static int capemgr_unload_slot(struct bone_cape_slot *slot);
+
+/* baseboard EEPROM field definition */
+#define BBRD_EE_FIELD_HEADER		0
+#define BBRD_EE_FIELD_BOARD_NAME	1
+#define BBRD_EE_FIELD_REVISION		2
+#define BBRD_EE_FIELD_SERIAL_NUMBER	3
+#define BBRD_EE_FIELD_CONFIG_OPTION	4
+#define BBRD_EE_FILED_RSVD1		5
+#define BBRD_EE_FILED_RSVD2		6
+#define BBRD_EE_FILED_RSVD3		7
+
+/* cape EEPROM field definitions */
+#define CAPE_EE_FIELD_HEADER		0
+#define CAPE_EE_FIELD_EEPROM_REV	1
+#define CAPE_EE_FIELD_BOARD_NAME	2
+#define CAPE_EE_FIELD_VERSION		3
+#define CAPE_EE_FIELD_MANUFACTURER	4
+#define CAPE_EE_FIELD_PART_NUMBER	5
+#define CAPE_EE_FIELD_NUMBER_OF_PINS	6
+#define CAPE_EE_FIELD_SERIAL_NUMBER	7
+#define CAPE_EE_FIELD_PIN_USAGE		8
+#define CAPE_EE_FIELD_VDD_3V3EXP	9
+#define CAPE_EE_FIELD_VDD_5V		10
+#define CAPE_EE_FIELD_SYS_5V		11
+#define CAPE_EE_FIELD_DC_SUPPLIED	12
+#define CAPE_EE_FIELD_FIELDS_NR		13
+
+#define EE_FIELD_MAKE_HEADER(p)	\
+	({ \
+		const u8 *_p = (p); \
+		(((u32)_p[0] << 24) | ((u32)_p[1] << 16) | \
+		 ((u32)_p[2] <<  8) |  (u32)_p[3]); \
+	})
+
+#define EE_FIELD_HEADER_VALID	0xaa5533ee
+
+struct ee_field {
+	const char	*name;
+	int		start;
+	int		size;
+	unsigned int	ascii : 1;
+	unsigned int	strip_trailing_dots : 1;
+	const char	*override;
+};
+
+/* baseboard EEPROM definitions */
+static const struct ee_field bbrd_sig_fields[] = {
+	[BBRD_EE_FIELD_HEADER] = {
+		.name		= "header",
+		.start		= 0,
+		.size		= 4,
+		.ascii		= 0,
+		.override	= "\xaa\x55\x33\xee",	/* AA 55 33 EE */
+	},
+	[BBRD_EE_FIELD_BOARD_NAME] = {
+		.name		= "board-name",
+		.start		= 4,
+		.size		= 8,
+		.ascii		= 1,
+		.strip_trailing_dots = 1,
+		.override	= "Board Name",
+	},
+	[BBRD_EE_FIELD_REVISION] = {
+		.name		= "revision",
+		.start		= 12,
+		.size		= 4,
+		.ascii		= 1,
+		.override	= "00A0",
+	},
+	[BBRD_EE_FIELD_SERIAL_NUMBER] = {
+		.name		= "serial-number",
+		.start		= 16,
+		.size		= 12,
+		.ascii		= 1,
+		.override	= "0000000000",
+	},
+	[BBRD_EE_FIELD_CONFIG_OPTION] = {
+		.name		= "config-option",
+		.start		= 28,
+		.size		= 32,
+	},
+};
+
+/* cape EEPROM definitions */
+static const struct ee_field cape_sig_fields[] = {
+	[CAPE_EE_FIELD_HEADER] = {
+		.name		= "header",
+		.start		= 0,
+		.size		= 4,
+		.ascii		= 0,
+		.override	= "\xaa\x55\x33\xee",	/* AA 55 33 EE */
+	},
+	[CAPE_EE_FIELD_EEPROM_REV] = {
+		.name		= "eeprom-format-revision",
+		.start		= 4,
+		.size		= 2,
+		.ascii		= 1,
+		.override	= "A0",
+	},
+	[CAPE_EE_FIELD_BOARD_NAME] = {
+		.name		= "board-name",
+		.start		= 6,
+		.size		= 32,
+		.ascii		= 1,
+		.strip_trailing_dots = 1,
+		.override	= "Override Board Name",
+	},
+	[CAPE_EE_FIELD_VERSION] = {
+		.name		= "version",
+		.start		= 38,
+		.size		= 4,
+		.ascii		= 1,
+		.override	= "00A0",
+	},
+	[CAPE_EE_FIELD_MANUFACTURER] = {
+		.name		= "manufacturer",
+		.start		= 42,
+		.size		= 16,
+		.ascii		= 1,
+		.strip_trailing_dots = 1,
+		.override	= "Override Manuf",
+	},
+	[CAPE_EE_FIELD_PART_NUMBER] = {
+		.name		= "part-number",
+		.start		= 58,
+		.size		= 16,
+		.ascii		= 1,
+		.strip_trailing_dots = 1,
+		.override	= "Override Part#",
+	},
+	[CAPE_EE_FIELD_NUMBER_OF_PINS] = {
+		.name		= "number-of-pins",
+		.start		= 74,
+		.size		= 2,
+		.ascii		= 0,
+		.override	= NULL,
+	},
+	[CAPE_EE_FIELD_SERIAL_NUMBER] = {
+		.name		= "serial-number",
+		.start		= 76,
+		.size		= 12,
+		.ascii		= 1,
+		.override	= "0000000000",
+	},
+	[CAPE_EE_FIELD_PIN_USAGE] = {
+		.name		= "pin-usage",
+		.start		= 88,
+		.size		= 140,
+		.ascii		= 0,
+		.override	= NULL,
+	},
+	[CAPE_EE_FIELD_VDD_3V3EXP] = {
+		.name		= "vdd-3v3exp",
+		.start		= 228,
+		.size		= 2,
+		.ascii		= 0,
+		.override	= NULL,
+	},
+	[CAPE_EE_FIELD_VDD_5V] = {
+		.name		= "vdd-5v",
+		.start		= 230,
+		.size		= 2,
+		.ascii		= 0,
+		.override	= NULL,
+	},
+	[CAPE_EE_FIELD_SYS_5V] = {
+		.name		= "sys-5v",
+		.start		= 232,
+		.size		= 2,
+		.ascii		= 0,
+		.override	= NULL,
+	},
+	[CAPE_EE_FIELD_DC_SUPPLIED] = {
+		.name		= "dc-supplied",
+		.start		= 234,
+		.size		= 2,
+		.ascii		= 0,
+		.override	= NULL,
+	},
+};
+
+static char *ee_field_get(const struct ee_field *sig_field,
+		const void *data, int field, char *buf, int bufsz)
+{
+	int len;
+
+	/* enough space? */
+	if (bufsz < sig_field->size + sig_field->ascii)
+		return NULL;
+
+	memcpy(buf, (char *)data + sig_field->start, sig_field->size);
+
+	/* terminate ascii field */
+	if (sig_field->ascii)
+		buf[sig_field->size] = '\0';
+
+	if (sig_field->strip_trailing_dots) {
+		len = strlen(buf);
+		while (len > 1 && buf[len - 1] == '.')
+			buf[--len] = '\0';
+	}
+
+	return buf;
+}
+
+char *bbrd_ee_field_get(const void *data,
+		int field, char *buf, int bufsz)
+{
+	if ((unsigned int)field >= ARRAY_SIZE(bbrd_sig_fields))
+		return NULL;
+
+	return ee_field_get(&bbrd_sig_fields[field], data, field, buf, bufsz);
+}
+
+char *cape_ee_field_get(const void *data,
+		int field, char *buf, int bufsz)
+{
+	if ((unsigned int)field >= ARRAY_SIZE(cape_sig_fields))
+		return NULL;
+
+	return ee_field_get(&cape_sig_fields[field], data, field, buf, bufsz);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id capemgr_of_match[] = {
+	{
+		.compatible = "ti,bone-capemgr",
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, capemgr_of_match);
+
+#endif
+
+static int bone_baseboard_scan(struct bone_baseboard *bbrd)
+{
+	struct capemgr_info *info = container_of(bbrd,
+			struct capemgr_info, baseboard);
+	const u8 *p;
+	int ret;
+	size_t len;
+
+	p = nvmem_cell_read(bbrd->nvmem_cell, &len);
+	if (IS_ERR(p)) {
+		ret = PTR_ERR(p);
+		dev_err(&info->pdev->dev,
+			"Cannot read cell (ret=%d)\n", ret);
+		return ret;
+	}
+	if (len < sizeof(bbrd->signature)) {
+		dev_info(&info->pdev->dev,
+			"Short read %d (should be >= %d bytes)\n",
+			len, sizeof(bbrd->signature));
+		return -EINVAL;
+	}
+	memcpy(bbrd->signature, p, sizeof(bbrd->signature));
+
+	p = bbrd->signature;
+	if (EE_FIELD_MAKE_HEADER(p) != EE_FIELD_HEADER_VALID) {
+		dev_err(&info->pdev->dev, "Invalid board signature '%08x'\n",
+			EE_FIELD_MAKE_HEADER(p));
+		return -ENODEV;
+	}
+
+	bbrd_ee_field_get(bbrd->signature,
+			BBRD_EE_FIELD_BOARD_NAME,
+			bbrd->board_name, sizeof(bbrd->board_name));
+	bbrd_ee_field_get(bbrd->signature,
+			BBRD_EE_FIELD_REVISION,
+			bbrd->revision, sizeof(bbrd->revision));
+	bbrd_ee_field_get(bbrd->signature,
+			BBRD_EE_FIELD_SERIAL_NUMBER,
+			bbrd->serial_number, sizeof(bbrd->serial_number));
+
+	/* board_name,version,manufacturer,part_number */
+	snprintf(bbrd->text_id, sizeof(bbrd->text_id) - 1,
+			"%s,%s,%s", bbrd->board_name, bbrd->revision,
+			bbrd->serial_number);
+
+	/* terminate always */
+	bbrd->text_id[sizeof(bbrd->text_id) - 1] = '\0';
+
+	return 0;
+}
+
+static int bone_slot_scan(struct bone_cape_slot *slot)
+{
+	struct capemgr_info *info = slot->info;
+	const u8 *p;
+	int r;
+	ssize_t len;
+
+	/* need to read EEPROM? */
+	if (slot->probed)
+		goto slot_fail_check;
+
+	if (uboot_capemgr_enabled)
+		goto slot_fail_check;
+
+	slot->probed = 1;
+
+	if (!slot->override) {
+
+		p = nvmem_cell_read(slot->nvmem_cell, &len);
+		if (IS_ERR(p)) {
+			r = PTR_ERR(p);
+			slot->probe_failed = 1;
+
+			/* timeout is normal when no cape is present */
+			if (r != -ETIMEDOUT)
+				dev_err(&info->pdev->dev,
+					"Cannot read cell (ret=%d)\n", r);
+			return r;
+		}
+		if (len < sizeof(slot->signature)) {
+			dev_info(&info->pdev->dev,
+				"Short read %d (should be >= %d bytes)\n",
+				len, sizeof(slot->signature));
+			return -EINVAL;
+		}
+		memcpy(slot->signature, p, sizeof(slot->signature));
+
+	} else
+		dev_info(&info->pdev->dev,
+			"Using override eeprom data at slot %d\n",
+			slot->slotno);
+
+	p = slot->signature;
+	if (EE_FIELD_MAKE_HEADER(p) != EE_FIELD_HEADER_VALID) {
+		dev_err(&info->pdev->dev,
+			"Invalid signature '%08x' at slot %d\n",
+			EE_FIELD_MAKE_HEADER(p), slot->slotno);
+		slot->probe_failed = 1;
+		return -ENODEV;
+	}
+
+	cape_ee_field_get(slot->signature,
+			CAPE_EE_FIELD_BOARD_NAME,
+			slot->board_name, sizeof(slot->board_name));
+	cape_ee_field_get(slot->signature,
+			CAPE_EE_FIELD_VERSION,
+			slot->version, sizeof(slot->version));
+	cape_ee_field_get(slot->signature,
+			CAPE_EE_FIELD_MANUFACTURER,
+			slot->manufacturer, sizeof(slot->manufacturer));
+	cape_ee_field_get(slot->signature,
+			CAPE_EE_FIELD_PART_NUMBER,
+			slot->part_number, sizeof(slot->part_number));
+
+	/* board_name,version,manufacturer,part_number */
+	snprintf(slot->text_id, sizeof(slot->text_id) - 1,
+			"%s,%s,%s,%s", slot->board_name, slot->version,
+			slot->manufacturer, slot->part_number);
+
+	/* terminate always */
+	slot->text_id[sizeof(slot->text_id) - 1] = '\0';
+
+slot_fail_check:
+	/* slot has failed and we don't support hotpluging */
+	if (slot->probe_failed)
+		return -ENODEV;
+
+	return 0;
+}
+
+/* return 0 if not matched,, 1 if matched */
+static int bone_match_cape(const char *match,
+		const char *part_number, const char *version)
+{
+	char *tmp_part_number, *tmp_version;
+	char *buf, *s, *e, *sn;
+	int found;
+
+	if (match == NULL || part_number == NULL)
+		return 0;
+
+	/* copy the argument to work on it */
+	buf = kstrdup(match, GFP_KERNEL);
+
+	/* no memory, too bad... */
+	if (buf == NULL)
+		return 0;
+
+	found = 0;
+	s = buf;
+	e = s + strlen(s);
+	while (s < e) {
+		/* find comma separator */
+		sn = strchr(s, ',');
+		if (sn != NULL)
+			*sn++ = '\0';
+		else
+			sn = e;
+		tmp_part_number = s;
+		tmp_version = strchr(tmp_part_number, ':');
+		if (tmp_version != NULL)
+			*tmp_version++ = '\0';
+		s = sn;
+
+		/* the part names must match */
+		if (strcmp(tmp_part_number, part_number) != 0)
+			continue;
+
+		/* if there's no version, match any */
+		if (version == NULL || tmp_version == NULL ||
+			strcmp(version, tmp_version) == 0) {
+			found = 1;
+			break;
+		}
+	}
+
+	kfree(buf);
+
+	return found;
+}
+
+/* helper method */
+static int of_multi_prop_cmp(const struct property *prop, const char *value)
+{
+	const char *cp;
+	int cplen, vlen, l;
+
+	/* check if it's directly compatible */
+	cp = prop->value;
+	cplen = prop->length;
+	vlen = strlen(value);
+
+	while (cplen > 0) {
+		/* compatible? */
+		if (of_compat_cmp(cp, value, vlen) == 0)
+			return 0;
+		l = strlen(cp) + 1;
+		cp += l;
+		cplen -= l;
+	}
+	return -1;
+}
+
+static ssize_t slot_ee_attr_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct slot_ee_attribute *ee_attr = to_slot_ee_attribute(attr);
+	struct bone_cape_slot *slot = ee_attr->slot;
+	const struct ee_field *sig_field;
+	int i, len;
+	char *p, *s;
+	u16 val;
+
+	/* add newline for ascii fields */
+	sig_field = &cape_sig_fields[ee_attr->field];
+
+	len = sig_field->size + sig_field->ascii;
+	p = kmalloc(len, GFP_KERNEL);
+	if (p == NULL)
+		return -ENOMEM;
+
+	s = cape_ee_field_get(slot->signature, ee_attr->field, p, len);
+	if (s == NULL)
+		return -EINVAL;
+
+	/* add newline for ascii fields and return */
+	if (sig_field->ascii) {
+		len = sprintf(buf, "%s\n", s);
+		goto out;
+	}
+
+	/* case by case handling */
+	switch (ee_attr->field) {
+	case CAPE_EE_FIELD_HEADER:
+		len = sprintf(buf, "%02x %02x %02x %02x\n",
+				s[0], s[1], s[2], s[3]);
+		break;
+
+		/* 2 bytes */
+	case CAPE_EE_FIELD_NUMBER_OF_PINS:
+	case CAPE_EE_FIELD_VDD_3V3EXP:
+	case CAPE_EE_FIELD_VDD_5V:
+	case CAPE_EE_FIELD_SYS_5V:
+	case CAPE_EE_FIELD_DC_SUPPLIED:
+		/* the bone is LE */
+		val = s[0] & (s[1] << 8);
+		len = sprintf(buf, "%u\n", (unsigned int)val & 0xffff);
+		break;
+
+	case CAPE_EE_FIELD_PIN_USAGE:
+
+		len = 0;
+		for (i = 0; i < sig_field->size / 2; i++) {
+			/* the bone is LE */
+			val = s[0] & (s[1] << 8);
+			sprintf(buf, "%04x\n", val);
+			buf += 5;
+			len += 5;
+			s += 2;
+		}
+
+		break;
+
+	default:
+		*buf = '\0';
+		len = 0;
+		break;
+	}
+
+out:
+	kfree(p);
+
+	return len;
+}
+
+#define SLOT_EE_ATTR(_name, _field) \
+	{ \
+		.devattr = __ATTR(_name, S_IRUGO, slot_ee_attr_show, NULL), \
+		.field = CAPE_EE_FIELD_##_field, \
+		.slot = NULL, \
+	}
+
+static const struct slot_ee_attribute slot_ee_attrs[] = {
+	SLOT_EE_ATTR(header, HEADER),
+	SLOT_EE_ATTR(eeprom-format-revision, EEPROM_REV),
+	SLOT_EE_ATTR(board-name, BOARD_NAME),
+	SLOT_EE_ATTR(version, VERSION),
+	SLOT_EE_ATTR(manufacturer, MANUFACTURER),
+	SLOT_EE_ATTR(part-number, PART_NUMBER),
+	SLOT_EE_ATTR(number-of-pins, NUMBER_OF_PINS),
+	SLOT_EE_ATTR(serial-number, SERIAL_NUMBER),
+	SLOT_EE_ATTR(pin-usage, PIN_USAGE),
+	SLOT_EE_ATTR(vdd-3v3exp, VDD_3V3EXP),
+	SLOT_EE_ATTR(vdd-5v, VDD_5V),
+	SLOT_EE_ATTR(sys-5v, SYS_5V),
+	SLOT_EE_ATTR(dc-supplied, DC_SUPPLIED),
+};
+
+static int bone_cape_slot_sysfs_register(struct bone_cape_slot *slot)
+{
+	struct capemgr_info *info = slot->info;
+	struct device *dev = &info->pdev->dev;
+	struct slot_ee_attribute *ee_attr;
+	struct attribute_group *attrgroup;
+	int i, err, sz;
+
+	slot->ee_attr_name = kasprintf(GFP_KERNEL, "slot-%d", slot->slotno);
+	if (slot->ee_attr_name == NULL) {
+		dev_err(dev, "slot #%d: Failed to allocate ee_attr_name\n",
+				slot->slotno);
+		err = -ENOMEM;
+		goto err_fail_no_ee_attr_name;
+	}
+
+	slot->ee_attrs_count = ARRAY_SIZE(slot_ee_attrs);
+
+	sz = slot->ee_attrs_count * sizeof(*slot->ee_attrs);
+	slot->ee_attrs = kmalloc(sz, GFP_KERNEL);
+	if (slot->ee_attrs == NULL) {
+		dev_err(dev, "slot #%d: Failed to allocate ee_attrs\n",
+				slot->slotno);
+		err = -ENOMEM;
+		goto err_fail_no_ee_attrs;
+	}
+
+	attrgroup = &slot->attrgroup;
+	memset(attrgroup, 0, sizeof(*attrgroup));
+	attrgroup->name = slot->ee_attr_name;
+
+	sz = sizeof(*slot->ee_attrs_tab) * (slot->ee_attrs_count + 1);
+	attrgroup->attrs = kmalloc(sz, GFP_KERNEL);
+	if (attrgroup->attrs == NULL) {
+		dev_err(dev, "slot #%d: Failed to allocate ee_attrs_tab\n",
+				slot->slotno);
+		err = -ENOMEM;
+		goto err_fail_no_ee_attrs_tab;
+	}
+	/* copy everything over */
+	memcpy(slot->ee_attrs, slot_ee_attrs, sizeof(slot_ee_attrs));
+
+	/* bind this attr to the slot */
+	for (i = 0; i < slot->ee_attrs_count; i++) {
+		ee_attr = &slot->ee_attrs[i];
+		ee_attr->slot = slot;
+		attrgroup->attrs[i] = &ee_attr->devattr.attr;
+	}
+	attrgroup->attrs[i] = NULL;
+
+	/* make lockdep happy */
+	for (i = 0; i < slot->ee_attrs_count; i++) {
+		ee_attr = &slot->ee_attrs[i];
+		sysfs_attr_init(&ee_attr->devattr.attr);
+	}
+
+	err = sysfs_create_group(&dev->kobj, attrgroup);
+	if (err != 0) {
+		dev_err(dev, "slot #%d: Failed to allocate ee_attrs_tab\n",
+				slot->slotno);
+		err = -ENOMEM;
+		goto err_fail_no_ee_attrs_group;
+	}
+
+	return 0;
+
+err_fail_no_ee_attrs_group:
+	kfree(slot->ee_attrs_tab);
+err_fail_no_ee_attrs_tab:
+	kfree(slot->ee_attrs);
+err_fail_no_ee_attrs:
+	kfree(slot->ee_attr_name);
+err_fail_no_ee_attr_name:
+	return err;
+}
+
+static void bone_cape_slot_sysfs_unregister(struct bone_cape_slot *slot)
+{
+	struct capemgr_info *info = slot->info;
+	struct device *dev = &info->pdev->dev;
+
+	sysfs_remove_group(&dev->kobj, &slot->attrgroup);
+	kfree(slot->ee_attrs_tab);
+	kfree(slot->ee_attrs);
+	kfree(slot->ee_attr_name);
+}
+
+static ssize_t bbrd_ee_attr_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct bbrd_ee_attribute *ee_attr = to_bbrd_ee_attribute(attr);
+	struct platform_device *pdev = to_platform_device(dev);
+	struct capemgr_info *info = platform_get_drvdata(pdev);
+	struct bone_baseboard *bbrd = &info->baseboard;
+	const struct ee_field *sig_field;
+	u16 val;
+	int i, len;
+	char *p, *s;
+
+	/* add newline for ascii fields */
+	sig_field = &bbrd_sig_fields[ee_attr->field];
+
+	len = sig_field->size + sig_field->ascii;
+	p = kmalloc(len, GFP_KERNEL);
+	if (p == NULL)
+		return -ENOMEM;
+
+	s = bbrd_ee_field_get(bbrd->signature, ee_attr->field, p, len);
+	if (s == NULL)
+		return -EINVAL;
+
+	/* add newline for ascii fields and return */
+	if (sig_field->ascii) {
+		len = sprintf(buf, "%s\n", s);
+		goto out;
+	}
+
+	/* case by case handling */
+	switch (ee_attr->field) {
+	case BBRD_EE_FIELD_HEADER:
+		len = sprintf(buf, "%02x %02x %02x %02x\n",
+				s[0], s[1], s[2], s[3]);
+		break;
+
+	case BBRD_EE_FIELD_CONFIG_OPTION:
+		len = 0;
+		for (i = 0; i < sig_field->size / 2; i++) {
+			/* the bone is LE */
+			val = s[0] & (s[1] << 8);
+			sprintf(buf, "%04x\n", val);
+			buf += 5;
+			len += 5;
+			s += 2;
+		}
+		break;
+
+	default:
+		*buf = '\0';
+		len = 0;
+		break;
+	}
+
+out:
+	kfree(p);
+
+	return len;
+}
+
+#define BBRD_EE_ATTR(_name, _field) \
+	{ \
+		.devattr = __ATTR(_name, 0440, bbrd_ee_attr_show, NULL), \
+		.field = BBRD_EE_FIELD_##_field, \
+	}
+
+static struct bbrd_ee_attribute bbrd_ee_attrs[] = {
+	BBRD_EE_ATTR(header, HEADER),
+	BBRD_EE_ATTR(board-name, BOARD_NAME),
+	BBRD_EE_ATTR(revision, REVISION),
+	BBRD_EE_ATTR(serial-number, SERIAL_NUMBER),
+	BBRD_EE_ATTR(config-option, CONFIG_OPTION),
+};
+
+static struct attribute *bbrd_attrs_flat[] = {
+	&bbrd_ee_attrs[BBRD_EE_FIELD_HEADER].devattr.attr,
+	&bbrd_ee_attrs[BBRD_EE_FIELD_BOARD_NAME].devattr.attr,
+	&bbrd_ee_attrs[BBRD_EE_FIELD_REVISION].devattr.attr,
+	&bbrd_ee_attrs[BBRD_EE_FIELD_SERIAL_NUMBER].devattr.attr,
+	&bbrd_ee_attrs[BBRD_EE_FIELD_CONFIG_OPTION].devattr.attr,
+	NULL,
+};
+
+static const struct attribute_group bbrd_attr_group = {
+	.name	= "baseboard",
+	.attrs	= bbrd_attrs_flat,
+};
+
+static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
+		char *buf);
+static ssize_t slots_store(struct device *dev, struct device_attribute *attr,
+		 const char *buf, size_t count);
+
+static DEVICE_ATTR(slots, 0644, slots_show, slots_store);
+
+static struct attribute *root_attrs_flat[] = {
+	&dev_attr_slots.attr,
+	NULL,
+};
+
+static const struct attribute_group root_attr_group = {
+	.attrs = root_attrs_flat,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&root_attr_group,
+	&bbrd_attr_group,
+	NULL,
+};
+
+static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct capemgr_info *info = platform_get_drvdata(pdev);
+	struct bone_cape_slot *slot;
+	ssize_t len, sz;
+
+	mutex_lock(&info->slots_list_mutex);
+	sz = 0;
+	list_for_each_entry(slot, &info->slot_list, node) {
+
+		len = sprintf(buf, "%2d: %c%c%c%c%c%c %3d %s\n",
+				slot->slotno,
+				slot->probed       ? 'P' : '-',
+				slot->probe_failed ? 'F' : '-',
+				slot->override     ? 'O' : '-',
+				slot->loading	   ? 'l' : '-',
+				slot->loaded	   ? 'L' : '-',
+				slot->disabled     ? 'D' : '-',
+				slot->overlay_id, slot->text_id);
+
+		buf += len;
+		sz += len;
+	}
+	mutex_unlock(&info->slots_list_mutex);
+
+	return sz;
+}
+
+static ssize_t slots_store(struct device *dev, struct device_attribute *attr,
+		 const char *buf, size_t count)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct capemgr_info *info = platform_get_drvdata(pdev);
+	struct bone_cape_slot *slot;
+	struct device_node *pnode, *node;
+	char *s, *part_number, *version;
+	int ret;
+	int slotno;
+
+	/* check for remove slot */
+	if (strlen(buf) > 0 && buf[0] == '-') {
+		ret = kstrtoint(buf + 1, 10, &slotno);
+		if (ret != 0)
+			return ret;
+
+		/* now load each (take lock to be sure */
+		mutex_lock(&info->slots_list_mutex);
+		list_for_each_entry(slot, &info->slot_list, node) {
+			if (slotno == slot->slotno)
+				goto found;
+		}
+
+		mutex_unlock(&info->slots_list_mutex);
+		return -ENODEV;
+found:
+		/* the hardware slots just get unloaded */
+		if (!slot->override) {
+			ret = capemgr_unload_slot(slot);
+			if (ret == 0)
+				dev_info(&pdev->dev,
+					"Unloaded slot #%d\n", slotno);
+			else
+				dev_err(&pdev->dev,
+					"Failed to unload slot #%d\n", slotno);
+		} else {
+			ret = capemgr_remove_slot_no_lock(slot);
+			if (ret == 0)
+				dev_info(&pdev->dev,
+					"Removed slot #%d\n", slotno);
+			else
+				dev_err(&pdev->dev,
+					"Failed to remove slot #%d\n", slotno);
+		}
+		mutex_unlock(&info->slots_list_mutex);
+
+		return ret == 0 ? strlen(buf) : ret;
+	}
+
+	part_number = kstrdup(buf, GFP_KERNEL);
+	if (part_number == NULL)
+		return -ENOMEM;
+
+	/* remove trailing spaces dots and newlines */
+	s = part_number + strlen(part_number);
+	while (s > part_number &&
+			(isspace(s[-1]) || s[-1] == '\n' || s[-1] == '.'))
+		*--s = '\0';
+
+	version = strchr(part_number, ':');
+	if (version != NULL)
+		*version++ = '\0';
+
+	dev_info(&pdev->dev, "part_number '%s', version '%s'\n",
+			part_number, version ? version : "N/A");
+
+	pnode = pdev->dev.of_node;
+	node = NULL;
+	slot = NULL;
+	ret = 0;
+
+	/* no specific slot found, try immediate */
+	slot = capemgr_add_slot(info, NULL, part_number, version, 0);
+
+	if (IS_ERR_OR_NULL(slot)) {
+		dev_err(&pdev->dev, "Failed to add slot #%d\n",
+			atomic_read(&info->next_slot_nr) - 1);
+		ret = slot ? PTR_ERR(slot) : -ENODEV;
+		slot = NULL;
+		goto err_fail;
+	}
+
+	kfree(part_number);
+
+	ret = capemgr_load_slot(slot);
+	if (ret != 0)
+		capemgr_remove_slot(slot);
+
+	return ret == 0 ? strlen(buf) : ret;
+err_fail:
+	of_node_put(node);
+	kfree(part_number);
+	return ret;
+}
+
+/* verify the overlay */
+static int capemgr_verify_overlay(struct bone_cape_slot *slot)
+{
+	struct capemgr_info *info = slot->info;
+	struct device *dev = &info->pdev->dev;
+	struct bone_baseboard *bbrd = &info->baseboard;
+	struct device_node *node = slot->overlay;
+	struct property *prop;
+	struct bone_cape_slot *slotn;
+	int err, counta, countb, i, j;
+	const char *ra, *rb;
+
+	/* validate */
+	if (node == NULL) {
+		dev_err(dev, "slot #%d: No overlay for '%s'\n",
+				slot->slotno, slot->part_number);
+		return -EINVAL;
+	}
+
+	/* check if the slot is compatible with the board */
+	prop = of_find_property(node, "compatible", NULL);
+
+	/* no compatible property? */
+	if (prop == NULL) {
+		dev_err(dev, "slot #%d: No compatible property for '%s'\n",
+				slot->slotno, slot->part_number);
+		return -EINVAL;
+	}
+
+	/* verify that the cape is baseboard compatible */
+	if (of_multi_prop_cmp(prop, bbrd->compatible_name) != 0) {
+		dev_err(dev, "slot #%d: Incompatible with baseboard for '%s'\n",
+				slot->slotno, slot->part_number);
+		return -EINVAL;
+	}
+
+	/* count the strings */
+	counta = of_property_count_strings(node, "exclusive-use");
+	/* no valid property, or no resources; no matter, it's OK */
+	if (counta <= 0)
+		return 0;
+
+	/* and now check if there's a resource conflict */
+	err = 0;
+	mutex_lock(&info->slots_list_mutex);
+	for (i = 0; i < counta; i++) {
+
+		ra = NULL;
+		err = of_property_read_string_index(node, "exclusive-use",
+				i, &ra);
+		if (err != 0) {
+			dev_err(dev, "slot #%d: Could not read string #%d\n",
+					slot->slotno, i);
+			break;
+		}
+
+		list_for_each_entry(slotn, &info->slot_list, node) {
+
+			/* don't check against self */
+			if (slot == slotn)
+				continue;
+
+			/* only check against loaded or loading slots */
+			if (!slotn->loaded && !slotn->loading)
+				continue;
+
+			countb = of_property_count_strings(slotn->overlay,
+					"exclusive-use");
+			/* no valid property, or resources; it's OK */
+			if (countb <= 0)
+				continue;
+
+
+			for (j = 0; j < countb; j++) {
+
+				/* count the resources */
+				rb = NULL;
+				err = of_property_read_string_index(
+					slotn->overlay, "exclusive-use",
+						j, &rb);
+				if (err != 0) {
+					/* error, but we don't care */
+					err = 0;
+					break;
+				}
+
+				/* ignore case; just in case ;) */
+				if (strcasecmp(ra, rb) == 0) {
+
+					/* resource conflict */
+					err = -EEXIST;
+					dev_err(dev,
+						"slot #%d: %s conflict %s (#%d:%s)\n",
+						slot->slotno,
+						slot->part_number, ra,
+						slotn->slotno,
+						slotn->part_number);
+					goto out;
+				}
+			}
+		}
+	}
+out:
+	mutex_unlock(&info->slots_list_mutex);
+
+	return err;
+}
+
+static int capemgr_load_slot(struct bone_cape_slot *slot)
+{
+	struct capemgr_info *info = slot->info;
+	struct device *dev = &info->pdev->dev;
+	const char *dtbo;
+	int err;
+
+	if (slot->probe_failed) {
+		dev_err(dev, "slot #%d: probe failed for '%s'\n",
+			slot->slotno, slot->part_number);
+		return -ENODEV;
+	}
+
+	if (slot->loaded) {
+		dev_err(dev, "slot #%d: already loaded for '%s'\n",
+			slot->slotno, slot->part_number);
+		return -EAGAIN;
+	}
+
+	/* make sure we don't leak this on repeated calls */
+	kfree(slot->dtbo);
+	slot->dtbo = NULL;
+
+	dev_dbg(dev, "slot #%d: Requesting part number/version based '%s-%s.dtbo\n",
+			slot->slotno, slot->part_number, slot->version);
+
+	/* request the part number + .dtbo*/
+	slot->dtbo = kasprintf(GFP_KERNEL, "%s-%s.dtbo",
+			slot->part_number, slot->version);
+	if (slot->dtbo == NULL) {
+		dev_err(dev, "slot #%d: Failed to get dtbo '%s'\n",
+				slot->slotno, dtbo);
+		return -ENOMEM;
+	}
+
+	dev_dbg(dev, "slot #%d: Requesting firmware '%s' for board-name '%s', version '%s'%s\n",
+			slot->slotno,
+			slot->dtbo, slot->board_name, slot->version,
+			system_state == SYSTEM_BOOTING ? " - booting" : "");
+
+	err = request_firmware_direct(&slot->fw, slot->dtbo, dev);
+	if (err != 0) {
+		dev_dbg(dev, "failed to load firmware '%s'\n", slot->dtbo);
+		goto err_fail_no_fw;
+	}
+
+	dev_dbg(dev, "slot #%d: dtbo '%s' loaded; converting to live tree\n",
+			slot->slotno, slot->dtbo);
+
+	of_fdt_unflatten_tree((unsigned long *)slot->fw->data, NULL,
+			&slot->overlay);
+	if (slot->overlay == NULL) {
+		dev_err(dev, "slot #%d: Failed to unflatten\n",
+				slot->slotno);
+		err = -EINVAL;
+		goto err_fail;
+	}
+
+	/* mark it as detached */
+	of_node_set_flag(slot->overlay, OF_DETACHED);
+
+	/* perform resolution */
+	err = of_resolve_phandles(slot->overlay);
+	if (err != 0) {
+		dev_err(dev, "slot #%d: Failed to resolve tree\n",
+				slot->slotno);
+		goto err_fail;
+	}
+
+	err = capemgr_verify_overlay(slot);
+	if (err != 0) {
+		dev_err(dev, "slot #%d: Failed verification\n",
+				slot->slotno);
+		goto err_fail;
+	}
+
+	err = of_overlay_create(slot->overlay);
+	if (err < 0) {
+		dev_err(dev, "slot #%d: Failed to create overlay\n",
+				slot->slotno);
+		goto err_fail;
+	}
+	slot->overlay_id = err;
+
+	slot->loading = 0;
+	slot->loaded = 1;
+
+	dev_info(dev, "slot #%d: dtbo '%s' loaded; overlay id #%d\n",
+			slot->slotno, slot->dtbo, slot->overlay_id);
+
+	return 0;
+
+err_fail:
+
+	/* TODO: free the overlay, we can't right now cause
+	 * the unflatten method does not track it */
+	slot->overlay = NULL;
+
+	release_firmware(slot->fw);
+	slot->fw = NULL;
+
+err_fail_no_fw:
+	slot->loading = 0;
+	return err;
+}
+
+static int capemgr_unload_slot(struct bone_cape_slot *slot)
+{
+	if (!slot->loaded || slot->overlay_id == -1)
+		return -EINVAL;
+
+	of_overlay_destroy(slot->overlay_id);
+	slot->overlay_id = -1;
+
+	slot->loaded = 0;
+
+	return 0;
+
+}
+
+/* slots_list_mutex must be taken */
+static int capemgr_remove_slot_no_lock(struct bone_cape_slot *slot)
+{
+	struct capemgr_info *info = slot->info;
+	struct device *dev = &info->pdev->dev;
+	int ret;
+
+	if (slot == NULL)
+		return 0;
+
+	if (slot->loaded && slot->overlay_id >= 0) {
+		/* unload just in case */
+		ret = capemgr_unload_slot(slot);
+		if (ret != 0) {
+			dev_err(dev, "Unable to unload slot #%d\n",
+				slot->slotno);
+			return ret;
+		}
+	}
+
+	/* if probed OK, remove the sysfs nodes */
+	if (slot->probed && !slot->probe_failed)
+		bone_cape_slot_sysfs_unregister(slot);
+
+	/* remove it from the list */
+	list_del(&slot->node);
+
+	if (slot->nvmem_cell)
+		nvmem_cell_put(slot->nvmem_cell);
+	devm_kfree(dev, slot);
+	return 0;
+}
+
+static int capemgr_remove_slot(struct bone_cape_slot *slot)
+{
+	struct capemgr_info *info = slot->info;
+	int ret;
+
+	mutex_lock(&info->slots_list_mutex);
+	ret = capemgr_remove_slot_no_lock(slot);
+	mutex_unlock(&info->slots_list_mutex);
+
+	return ret;
+}
+
+static int bone_slot_fill_override(struct bone_cape_slot *slot,
+		const char *part_number, const char *version)
+{
+	const struct ee_field *sig_field;
+	int i, len, has_part_number;
+	char *p;
+
+	slot->probe_failed = 0;
+	slot->probed = 0;
+
+	/* zero out signature */
+	memset(slot->signature, 0,
+			sizeof(slot->signature));
+
+	/* first, fill in all with override defaults */
+	for (i = 0; i < ARRAY_SIZE(cape_sig_fields); i++) {
+
+		sig_field = &cape_sig_fields[i];
+
+		/* point to the entry */
+		p = slot->signature + sig_field->start;
+
+		if (sig_field->override)
+			memcpy(p, sig_field->override,
+					sig_field->size);
+		else
+			memset(p, 0, sig_field->size);
+	}
+
+	/* if a part_number is supplied use it */
+	len = part_number ? strlen(part_number) : 0;
+	if (len > 0) {
+		sig_field = &cape_sig_fields[CAPE_EE_FIELD_PART_NUMBER];
+
+		/* point to the entry */
+		p = slot->signature + sig_field->start;
+
+		/* copy and zero out any remainder */
+		if (len > sig_field->size)
+			len = sig_field->size;
+		memcpy(p, part_number, len);
+		if (len < sig_field->size)
+			memset(p + len, 0, sig_field->size - len);
+
+		has_part_number = 1;
+	}
+
+	/* if a version is supplied use it */
+	len = version ? strlen(version) : 0;
+	if (len > 0) {
+		sig_field = &cape_sig_fields[CAPE_EE_FIELD_VERSION];
+
+		/* point to the entry */
+		p = slot->signature + sig_field->start;
+
+		/* copy and zero out any remainder */
+		if (len > sig_field->size)
+			len = sig_field->size;
+		memcpy(p, version, len);
+		if (len < sig_field->size)
+			memset(p + len, 0, sig_field->size - len);
+	}
+
+	/* we must have a part number */
+	if (!has_part_number)
+		return -EINVAL;
+
+	slot->override = 1;
+
+	return 0;
+}
+
+static struct bone_cape_slot *
+capemgr_add_slot(struct capemgr_info *info, const char *slot_name,
+		const char *part_number, const char *version, int prio)
+{
+	struct bone_cape_slot *slot;
+	struct device *dev = &info->pdev->dev;
+	int slotno;
+	int ret;
+
+	slotno = atomic_inc_return(&info->next_slot_nr) - 1;
+
+	slot = devm_kzalloc(dev, sizeof(*slot), GFP_KERNEL);
+	if (slot == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	slot->info = info;
+	slot->slotno = slotno;
+	slot->priority = prio;
+	slot->overlay_id = -1;
+
+	if (slot_name) {
+		slot->nvmem_cell = nvmem_cell_get(dev, slot_name);
+		if (IS_ERR(slot->nvmem_cell)) {
+			ret = PTR_ERR(slot->nvmem_cell);
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "Failed to get slot eeprom cell\n");
+			slot->nvmem_cell = NULL;
+			goto err_out;
+		}
+	} else {
+		dev_err(dev, "slot #%d: override\n", slotno);
+
+		/* fill in everything with defaults first */
+		ret = bone_slot_fill_override(slot, part_number, version);
+		if (ret != 0) {
+			dev_err(dev, "slot #%d: override failed\n", slotno);
+			goto err_out;
+		}
+	}
+
+	ret = bone_slot_scan(slot);
+	if (ret != 0) {
+
+		if (!slot->probe_failed) {
+			dev_err(dev, "slot #%d: scan failed\n",
+					slotno);
+			goto err_out;
+		}
+
+		dev_err(dev, "slot #%d: No cape found\n", slotno);
+		/* but all is fine */
+	} else {
+		if (uboot_capemgr_enabled == 0) {
+			dev_err(dev, "slot #%d: '%s'\n",
+					slotno, slot->text_id);
+
+			ret = bone_cape_slot_sysfs_register(slot);
+			if (ret != 0) {
+				dev_err(dev, "slot #%d: sysfs register failed\n",
+						slotno);
+				goto err_out;
+			}
+		} else {
+			dev_err(dev, "slot #%d: auto loading handled by U-Boot\n", slotno);
+		}
+	}
+
+	/* add to the slot list */
+	mutex_lock(&info->slots_list_mutex);
+	list_add_tail(&slot->node, &info->slot_list);
+	mutex_unlock(&info->slots_list_mutex);
+
+	return slot;
+
+err_out:
+	if (slot->nvmem_cell)
+		nvmem_cell_put(slot->nvmem_cell);
+	devm_kfree(dev, slot);
+	return ERR_PTR(ret);
+}
+
+/* return 1 if it makes sense to retry loading */
+static int retry_loading_condition(struct bone_cape_slot *slot)
+{
+	struct capemgr_info *info = slot->info;
+	struct device *dev = &info->pdev->dev;
+	struct bone_cape_slot *slotn;
+	int ret;
+
+	dev_dbg(dev, "loader: retry_loading slot-%d %s:%s (prio %d)\n",
+			slot->slotno, slot->part_number, slot->version,
+			slot->priority);
+
+	mutex_lock(&info->slots_list_mutex);
+	ret = 0;
+	list_for_each_entry(slotn, &info->slot_list, node) {
+		/* if same slot or not loading skip */
+		if (!slotn->loading || slotn->retry_loading)
+			continue;
+		/* at least one cape is still loading (without retrying) */
+		ret = 1;
+	}
+	mutex_unlock(&info->slots_list_mutex);
+	return ret;
+}
+
+/* return 1 if this slot is clear to try to load now */
+static int clear_to_load_condition(struct bone_cape_slot *slot)
+{
+	struct capemgr_info *info = slot->info;
+	int my_prio = slot->priority;
+	struct device *dev = &info->pdev->dev;
+	int ret;
+
+	dev_dbg(dev, "loader: check slot-%d %s:%s (prio %d)\n", slot->slotno,
+			slot->part_number, slot->version, slot->priority);
+
+	mutex_lock(&info->slots_list_mutex);
+	ret = 1;
+	list_for_each_entry(slot, &info->slot_list, node) {
+		/* if any slot is loading with lowest priority */
+		if (!slot->loading)
+			continue;
+		if (slot->priority < my_prio) {
+			ret = 0;
+			break;
+		}
+	}
+	mutex_unlock(&info->slots_list_mutex);
+	return ret;
+}
+
+static int capemgr_loader(void *data)
+{
+	struct bone_cape_slot *slot = data;
+	struct capemgr_info *info = slot->info;
+	struct device *dev = &info->pdev->dev;
+	int ret, done, other_loading, booting;
+
+	done = 0;
+
+	slot->retry_loading = 0;
+
+	dev_dbg(dev, "loader: before slot-%d %s:%s (prio %d)\n", slot->slotno,
+			slot->part_number, slot->version, slot->priority);
+
+	/*
+	 * We have a basic priority based arbitration system
+	 * Slots have priorities, so the lower priority ones
+	 * should start loading first. So each time we end up
+	 * here.
+	 */
+	ret = wait_event_interruptible(info->load_wq,
+			clear_to_load_condition(slot));
+	if (ret < 0) {
+		dev_warn(dev, "loader, Signal pending\n");
+		return ret;
+	}
+
+	dev_dbg(dev, "loader: after slot-%d %s:%s (prio %d)\n", slot->slotno,
+			slot->part_number, slot->version, slot->priority);
+
+	/* using the return value */
+	ret = capemgr_load_slot(slot);
+
+	/* wake up all just in case */
+	wake_up_interruptible_all(&info->load_wq);
+
+	if (ret == 0)
+		goto done;
+
+	dev_dbg(dev, "loader: retrying slot-%d %s:%s (prio %d)\n", slot->slotno,
+			slot->part_number, slot->version, slot->priority);
+
+	/* first attempt has failed; now try each time there's any change */
+	slot->retry_loading = 1;
+
+	for (;;) {
+		booting = (system_state == SYSTEM_BOOTING);
+		other_loading = retry_loading_condition(slot);
+		if (!booting && !other_loading)
+			break;
+
+		/* simple wait for someone to kick us */
+		if (other_loading) {
+			DEFINE_WAIT(__wait);
+
+			prepare_to_wait(&info->load_wq, &__wait,
+					TASK_INTERRUPTIBLE);
+			finish_wait(&info->load_wq, &__wait);
+		} else {
+			/* always delay when booting */
+			msleep(boot_scan_period);
+		}
+
+		if (signal_pending(current)) {
+			dev_warn(dev, "loader, Signal pending\n");
+			ret = -ERESTARTSYS;
+			goto done;
+		}
+
+		/* using the return value */
+		ret = capemgr_load_slot(slot);
+		if (ret == 0)
+			goto done;
+
+		/* wake up all just in case */
+		wake_up_interruptible_all(&info->load_wq);
+	}
+
+done:
+	slot->loading = 0;
+	slot->retry_loading = 0;
+
+	if (ret == 0) {
+		dev_dbg(dev, "loader: done slot-%d %s:%s (prio %d)\n",
+			slot->slotno, slot->part_number, slot->version,
+			slot->priority);
+	} else {
+		dev_err(dev, "loader: failed to load slot-%d %s:%s (prio %d)\n",
+			slot->slotno, slot->part_number, slot->version,
+			slot->priority);
+
+		/* if it's a override slot remove it */
+		if (slot->override)
+			capemgr_remove_slot(slot);
+	}
+
+	return ret;
+}
+
+static int
+capemgr_probe(struct platform_device *pdev)
+{
+	struct capemgr_info *info;
+	struct bone_baseboard *bbrd;
+	struct bone_cape_slot *slot;
+	struct device_node *pnode = pdev->dev.of_node;
+	struct device_node *baseboardmaps_node;
+	struct device_node *node;
+	const char *part_number;
+	const char *version;
+	const char *board_name;
+	const char *compatible_name;
+	char slot_name[16];
+	u32 slots_nr;
+	int i, ret, len, prio;
+	long val;
+	char *wbuf, *s, *p, *e;
+
+	if (uboot_capemgr_enabled)
+		return 0;
+
+	/* we don't use platform_data at all; we require OF */
+	if (pnode == NULL)
+		return -ENOTSUPP;
+
+	info = devm_kzalloc(&pdev->dev,
+			sizeof(struct capemgr_info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->pdev = pdev;
+	platform_set_drvdata(pdev, info);
+
+	atomic_set(&info->next_slot_nr, 0);
+	INIT_LIST_HEAD(&info->slot_list);
+	mutex_init(&info->slots_list_mutex);
+
+	init_waitqueue_head(&info->load_wq);
+
+	baseboardmaps_node = NULL;
+
+	/* find the baseboard */
+	bbrd = &info->baseboard;
+
+	baseboardmaps_node = of_get_child_by_name(pnode, "baseboardmaps");
+	if (baseboardmaps_node == NULL) {
+		dev_err(&pdev->dev, "Failed to get baseboardmaps node");
+		ret = -ENODEV;
+		goto err_exit;
+	}
+
+	bbrd->nvmem_cell = nvmem_cell_get(&pdev->dev, "baseboard");
+	if (IS_ERR(bbrd->nvmem_cell)) {
+		ret = PTR_ERR(bbrd->nvmem_cell);
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "Failed to get baseboard eeprom cell\n");
+		bbrd->nvmem_cell = NULL;
+		goto err_exit;
+	}
+
+	ret = bone_baseboard_scan(bbrd);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to scan baseboard eeprom\n");
+		goto err_exit;
+	}
+
+	dev_info(&pdev->dev, "Baseboard: '%s'\n", bbrd->text_id);
+
+	board_name = NULL;
+	compatible_name = NULL;
+	for_each_child_of_node(baseboardmaps_node, node) {
+		/* there must be board-name */
+		if (of_property_read_string(node, "board-name",
+					&board_name) != 0 ||
+		    of_property_read_string(node, "compatible-name",
+					&compatible_name) != 0)
+			continue;
+
+		if (strcmp(bbrd->board_name, board_name) == 0)
+			break;
+	}
+	of_node_put(baseboardmaps_node);
+	baseboardmaps_node = NULL;
+
+	if (node == NULL) {
+		dev_err(&pdev->dev, "Failed to find compatible map for %s\n",
+				bbrd->board_name);
+		ret = -ENODEV;
+		goto err_exit;
+	}
+	bbrd->compatible_name = kstrdup(compatible_name, GFP_KERNEL);
+	if (bbrd->compatible_name == NULL) {
+		ret = -ENOMEM;
+		goto err_exit;
+	}
+	of_node_put(node);
+
+	/* get slot number */
+	ret = of_property_read_u32(pnode, "#slots", &slots_nr);
+	if (ret != 0)
+		slots_nr = 0;
+
+	dev_info(&pdev->dev, "compatible-baseboard=%s - #slots=%d\n",
+			bbrd->compatible_name, slots_nr);
+
+	for (i = 0; i < slots_nr; i++) {
+		snprintf(slot_name, sizeof(slot_name), "slot%d", i);
+		slot = capemgr_add_slot(info, slot_name, NULL, NULL, 0);
+		if (IS_ERR(slot)) {
+			dev_err(&pdev->dev, "Failed to add slot #%d\n",
+				atomic_read(&info->next_slot_nr));
+			ret = PTR_ERR(slot);
+			goto err_exit;
+		}
+	}
+
+	/* iterate over enable_partno (if there) */
+	if (enable_partno && strlen(enable_partno) > 0) {
+
+		/* allocate a temporary buffer */
+		wbuf = devm_kzalloc(&pdev->dev, PAGE_SIZE, GFP_KERNEL);
+		if (wbuf == NULL) {
+			ret = -ENOMEM;
+			goto err_exit;
+		}
+
+		/* add any enable_partno capes */
+		s = enable_partno;
+		while (*s) {
+			/* form is PART[:REV[:PRIO]],PART.. */
+			p = strchr(s, ',');
+			if (p == NULL)
+				e = s + strlen(s);
+			else
+				e = p;
+
+			/* copy to temp buffer */
+			len = e - s;
+			if (len >= PAGE_SIZE - 1)
+				len = PAGE_SIZE - 1;
+			memcpy(wbuf, s, len);
+			wbuf[len] = '\0';
+
+			/* move to the next */
+			s = *e ? e + 1 : e;
+
+			part_number = wbuf;
+
+			/* default version is NULL & prio is 0 */
+			version = NULL;
+			prio = 0;
+
+			/* now split the rev & prio part */
+			p = strchr(wbuf, ':');
+			if (p != NULL) {
+				*p++ = '\0';
+				if (*p != ':')
+					version = p;
+				p = strchr(p, ':');
+				if (p != NULL) {
+					*p++ = '\0';
+					ret = kstrtol(p, 10, &val);
+					if (ret == 0)
+						prio = val;
+				}
+			}
+
+			dev_info(&pdev->dev,
+				"enabled_partno PARTNO '%s' VER '%s' PR '%d'\n",
+					part_number,
+					version ? version : "N/A", prio);
+
+			/* only immediate slots are allowed here */
+			slot = capemgr_add_slot(info, NULL,
+					part_number, version, prio);
+
+			/* we continue even in case of an error */
+			if (IS_ERR_OR_NULL(slot)) {
+				dev_warn(&pdev->dev, "Failed to add slot #%d\n",
+					atomic_read(&info->next_slot_nr) - 1);
+			}
+		}
+
+		devm_kfree(&pdev->dev, wbuf);
+	}
+
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (IS_ERR_VALUE(ret)) {
+		dev_err(&pdev->dev, "Failed to pm_runtime_get_sync()\n");
+		goto err_exit;
+	}
+
+	pm_runtime_put(&pdev->dev);
+
+	/* it is safe to create the attribute groups */
+	ret = sysfs_create_groups(&pdev->dev.kobj, attr_groups);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to create sysfs attributes\n");
+		goto err_exit;
+	}
+	/* automatically cleared by driver core now */
+	pdev->dev.groups = attr_groups;
+
+	/* now load each (take lock to be sure */
+	mutex_lock(&info->slots_list_mutex);
+
+	list_for_each_entry(slot, &info->slot_list, node) {
+
+		/* if matches the disabled ones skip */
+		if (bone_match_cape(disable_partno, slot->part_number,
+					slot->version)) {
+			dev_info(&pdev->dev,
+				"Skipping loading of disabled cape with part# %s\n",
+				slot->part_number);
+			slot->disabled = 1;
+			continue;
+		}
+
+		if (!slot->probe_failed && !slot->loaded)
+			slot->loading = 1;
+	}
+
+	/* now start the loader thread(s) (all at once) */
+	list_for_each_entry(slot, &info->slot_list, node) {
+
+		if (!slot->loading)
+			continue;
+
+		slot->loader_thread = kthread_run(capemgr_loader,
+				slot, "capemgr-loader-%d",
+				slot->slotno);
+		if (IS_ERR(slot->loader_thread)) {
+			dev_warn(&pdev->dev, "slot #%d: Failed to start loader\n",
+					slot->slotno);
+			slot->loader_thread = NULL;
+		}
+	}
+	mutex_unlock(&info->slots_list_mutex);
+
+	dev_info(&pdev->dev, "initialized OK.\n");
+
+	return 0;
+
+err_exit:
+	if (bbrd->nvmem_cell)
+		nvmem_cell_put(bbrd->nvmem_cell);
+	of_node_put(baseboardmaps_node);
+	platform_set_drvdata(pdev, NULL);
+	devm_kfree(&pdev->dev, info);
+
+	return ret;
+}
+
+static int capemgr_remove(struct platform_device *pdev)
+{
+	struct capemgr_info *info = platform_get_drvdata(pdev);
+	struct bone_baseboard *bbrd = &info->baseboard;
+	struct bone_cape_slot *slot, *slotn;
+	int ret;
+
+	mutex_lock(&info->slots_list_mutex);
+	list_for_each_entry_safe(slot, slotn, &info->slot_list, node)
+		capemgr_remove_slot_no_lock(slot);
+	mutex_unlock(&info->slots_list_mutex);
+
+	platform_set_drvdata(pdev, NULL);
+
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (IS_ERR_VALUE(ret))
+		return ret;
+
+	pm_runtime_put(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	if (bbrd->nvmem_cell)
+		nvmem_cell_put(bbrd->nvmem_cell);
+	devm_kfree(&pdev->dev, info);
+
+	return 0;
+}
+
+static struct platform_driver capemgr_driver = {
+	.probe		= capemgr_probe,
+	.remove		= capemgr_remove,
+	.driver		= {
+		.name	= "bone_capemgr",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(capemgr_of_match),
+	},
+};
+
+module_platform_driver(capemgr_driver);
+
+MODULE_AUTHOR("Pantelis Antoniou");
+MODULE_DESCRIPTION("Beaglebone cape manager");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:bone_capemgr");
diff --git b/drivers/misc/cape/Kconfig b/drivers/misc/cape/Kconfig
new file mode 100644
index 0000000..a2ef85e
--- /dev/null
+++ b/drivers/misc/cape/Kconfig
@@ -0,0 +1,5 @@
+#
+# Capes
+#
+
+source "drivers/misc/cape/beaglebone/Kconfig"
diff --git b/drivers/misc/cape/Makefile b/drivers/misc/cape/Makefile
new file mode 100644
index 0000000..7c4eb96
--- /dev/null
+++ b/drivers/misc/cape/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for cape like devices
+#
+
+obj-y				+= beaglebone/
diff --git b/drivers/misc/cape/beaglebone/Kconfig b/drivers/misc/cape/beaglebone/Kconfig
new file mode 100644
index 0000000..eeb6782
--- /dev/null
+++ b/drivers/misc/cape/beaglebone/Kconfig
@@ -0,0 +1,10 @@
+#
+# Beaglebone capes
+#
+
+config BEAGLEBONE_PINMUX_HELPER
+	tristate "Beaglebone Pinmux Helper"
+	depends on ARCH_OMAP2PLUS && OF
+	default n
+	help
+	  Say Y here to include support for the pinmux helper
diff --git b/drivers/misc/cape/beaglebone/Makefile b/drivers/misc/cape/beaglebone/Makefile
new file mode 100644
index 0000000..7f4617a
--- /dev/null
+++ b/drivers/misc/cape/beaglebone/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for beaglebone capes
+#
+
+obj-$(CONFIG_BEAGLEBONE_PINMUX_HELPER)	+= bone-pinmux-helper.o
diff --git b/drivers/misc/cape/beaglebone/bone-pinmux-helper.c b/drivers/misc/cape/beaglebone/bone-pinmux-helper.c
new file mode 100644
index 0000000..d81363a
--- /dev/null
+++ b/drivers/misc/cape/beaglebone/bone-pinmux-helper.c
@@ -0,0 +1,242 @@
+/*
+ * Pinmux helper driver
+ *
+ * Copyright (C) 2013 Pantelis Antoniou <panto@antoniou-consulting.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/slab.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/pinctrl/consumer.h>
+
+static const struct of_device_id bone_pinmux_helper_of_match[] = {
+	{
+		.compatible = "bone-pinmux-helper",
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, bone_pinmux_helper_of_match);
+
+struct pinmux_helper_data {
+	struct pinctrl *pinctrl;
+	char *selected_state_name;
+};
+
+static ssize_t pinmux_helper_show_state(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct pinmux_helper_data *data = platform_get_drvdata(pdev);
+	const char *name;
+
+	name = data->selected_state_name;
+	if (name == NULL || strlen(name) == 0)
+		name = "none";
+	return sprintf(buf, "%s\n", name);
+}
+
+static ssize_t pinmux_helper_store_state(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct pinmux_helper_data *data = platform_get_drvdata(pdev);
+	struct pinctrl_state *state;
+	char *state_name;
+	char *s;
+	int err;
+
+	/* duplicate (as a null terminated string) */
+	state_name = kmalloc(count + 1, GFP_KERNEL);
+	if (state_name == NULL)
+		return -ENOMEM;
+	memcpy(state_name, buf, count);
+	state_name[count] = '\0';
+
+	/* and chop off newline */
+	s = strchr(state_name, '\n');
+	if (s != NULL)
+		*s = '\0';
+
+	/* try to select default state at first (if it exists) */
+	state = pinctrl_lookup_state(data->pinctrl, state_name);
+	if (!IS_ERR(state)) {
+		err = pinctrl_select_state(data->pinctrl, state);
+		if (err != 0)
+			dev_err(dev, "Failed to select state %s\n",
+					state_name);
+	} else {
+		dev_err(dev, "Failed to find state %s\n", state_name);
+		err = PTR_RET(state);
+	}
+
+	if (err == 0) {
+		kfree(data->selected_state_name);
+		data->selected_state_name = state_name;
+	}
+
+	return err ? err : count;
+}
+
+static DEVICE_ATTR(state, S_IWUSR | S_IRUGO,
+		   pinmux_helper_show_state, pinmux_helper_store_state);
+
+static struct attribute *pinmux_helper_attributes[] = {
+	&dev_attr_state.attr,
+	NULL
+};
+
+static const struct attribute_group pinmux_helper_attr_group = {
+	.attrs = pinmux_helper_attributes,
+};
+
+static int bone_pinmux_helper_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct pinmux_helper_data *data;
+	struct pinctrl_state *state;
+	char *state_name;
+	const char *mode_name;
+	int mode_len;
+	int err;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (data == NULL) {
+		dev_err(dev, "Failed to allocate data\n");
+		err = -ENOMEM;
+		goto err_no_mem;
+	}
+
+	state_name = kmalloc(strlen(PINCTRL_STATE_DEFAULT) + 1,
+			GFP_KERNEL);
+	if (state_name == NULL) {
+		dev_err(dev, "Failed to allocate state name\n");
+		err = -ENOMEM;
+		goto err_no_state_mem;
+	}
+	data->selected_state_name = state_name;
+	strcpy(data->selected_state_name, PINCTRL_STATE_DEFAULT);
+
+	platform_set_drvdata(pdev, data);
+
+	data->pinctrl = devm_pinctrl_get(dev);
+	if (IS_ERR(data->pinctrl)) {
+		dev_err(dev, "Failed to get pinctrl\n");
+		err = PTR_RET(data->pinctrl);
+		goto err_no_pinctrl;
+	}
+
+	/* See if an initial mode is specified in the device tree */
+	mode_name = of_get_property(dev->of_node, "mode", &mode_len);
+
+	err = -1;
+	if (mode_name != NULL ) {
+		state_name = kmalloc(mode_len + 1, GFP_KERNEL);
+		if (state_name == NULL) {
+			dev_err(dev, "Failed to allocate state name\n");
+			err = -ENOMEM;
+			goto err_no_mode_mem;
+		}
+		strncpy(state_name, mode_name, mode_len);
+
+		/* try to select requested mode */
+		state = pinctrl_lookup_state(data->pinctrl, state_name);
+		if (!IS_ERR(state)) {
+			err = pinctrl_select_state(data->pinctrl, state);
+			if (err != 0) {
+				dev_warn(dev, "Unable to select requested mode %s\n", state_name);
+				kfree(state_name);
+			} else {
+				kfree(data->selected_state_name);
+				data->selected_state_name = state_name;
+				dev_notice(dev, "Set initial pinmux mode to %s\n", state_name);
+			}
+		}
+	}
+
+	/* try to select default state if mode_name failed */
+	if ( err != 0) {
+		state = pinctrl_lookup_state(data->pinctrl,
+				data->selected_state_name);
+		if (!IS_ERR(state)) {
+			err = pinctrl_select_state(data->pinctrl, state);
+			if (err != 0) {
+				dev_err(dev, "Failed to select default state\n");
+				goto err_no_state;
+			}
+		} else {
+			data->selected_state_name = '\0';
+		}
+	}
+
+	/* Register sysfs hooks */
+	err = sysfs_create_group(&dev->kobj, &pinmux_helper_attr_group);
+	if (err) {
+		dev_err(dev, "Failed to create sysfs group\n");
+		goto err_no_sysfs;
+	}
+
+	return 0;
+
+err_no_sysfs:
+err_no_state:
+err_no_mode_mem:
+	devm_pinctrl_put(data->pinctrl);
+err_no_pinctrl:
+	devm_kfree(dev, data->selected_state_name);
+err_no_state_mem:
+	devm_kfree(dev, data);
+err_no_mem:
+	return err;
+}
+
+static int bone_pinmux_helper_remove(struct platform_device *pdev)
+{
+	struct pinmux_helper_data *data = platform_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+
+	sysfs_remove_group(&dev->kobj, &pinmux_helper_attr_group);
+	kfree(data->selected_state_name);
+	devm_pinctrl_put(data->pinctrl);
+	devm_kfree(dev, data);
+
+	return 0;
+}
+
+struct platform_driver bone_pinmux_helper_driver = {
+	.probe		= bone_pinmux_helper_probe,
+	.remove		= bone_pinmux_helper_remove,
+	.driver = {
+		.name		= "bone-pinmux-helper",
+		.owner		= THIS_MODULE,
+		.of_match_table	= bone_pinmux_helper_of_match,
+	},
+};
+
+module_platform_driver(bone_pinmux_helper_driver);
+
+MODULE_AUTHOR("Pantelis Antoniou");
+MODULE_DESCRIPTION("Beaglebone pinmux helper driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:bone-pinmux-helper");
diff --git b/drivers/misc/cape_bone_argus/Kconfig b/drivers/misc/cape_bone_argus/Kconfig
new file mode 100644
index 0000000..1b39661
--- /dev/null
+++ b/drivers/misc/cape_bone_argus/Kconfig
@@ -0,0 +1,7 @@
+comment "Argus cape driver for beaglebone black"
+
+config CAPE_BONE_ARGUS
+	tristate "Argus Cape Driver"
+	default M
+	help
+	    Argus Cape Driver
diff --git b/drivers/misc/cape_bone_argus/Makefile b/drivers/misc/cape_bone_argus/Makefile
new file mode 100644
index 0000000..5482562
--- /dev/null
+++ b/drivers/misc/cape_bone_argus/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for Argus cape
+#
+
+obj-$(CONFIG_CAPE_BONE_ARGUS)	+= cape_bone_argus.o
diff --git b/drivers/misc/cape_bone_argus/cape_bone_argus.c b/drivers/misc/cape_bone_argus/cape_bone_argus.c
new file mode 100644
index 0000000..c434218
--- /dev/null
+++ b/drivers/misc/cape_bone_argus/cape_bone_argus.c
@@ -0,0 +1,415 @@
+/* -*- linux-c -*- */
+
+/* Linux Kernel Module for Breakaway Systems UPS control.
+ *
+ * PUBLIC DOMAIN
+ */
+
+#include <linux/syscalls.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/mount.h>
+#include <linux/workqueue.h>
+#include <linux/cdev.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/of_gpio.h>
+
+/* Module to sync file systems leaving them mounted read-only,
+ * then signal the UPS that it is safe to remove
+ * power, and finally halt the processor.
+ * Also to allow kicking the watchdog from user mode.
+ */
+#undef DEBUG_ARGUS
+
+#define N_GPIOS 9		/* Total number of GPIOS used */
+
+#define REQ_GPIO_IDX 0		/* Indices got GPIOS */
+#define ACK_GPIO_IDX 1
+#define WDG_GPIO_IDX 2
+#define LED1_GREEN_IDX 3
+#define LED1_RED_IDX 4
+#define LED2_GREEN_IDX 5
+#define LED2_RED_IDX 6
+#define GEN_OUT1_IDX 7
+#define GEN_OUT2_IDX 8
+
+static struct argus_ups_info {	/* As there is only one UPS device we can make this static */
+	struct fasync_struct *async_queue; /* asynchronous readers */
+	struct platform_device *pdev;
+	struct pwm_device *pwm_dev;
+	struct gpio gpios[N_GPIOS];
+} info = {NULL, NULL, NULL, /* Some fields filled in by device tree, probe, etc. */
+     {
+	     {-1, GPIOF_IN, "Powerdown request"},
+	     {-1, GPIOF_OUT_INIT_LOW, "Powerdown acknowledge" },
+	     {-1, GPIOF_OUT_INIT_LOW, "Watchdog"},
+	     {-1, GPIOF_OUT_INIT_LOW, "LED 1 Green"},
+	     {-1, GPIOF_OUT_INIT_LOW, "LED 1 Red"},
+	     {-1, GPIOF_OUT_INIT_LOW, "LED 2 Green"},
+	     {-1, GPIOF_OUT_INIT_LOW, "LED 2 Red"},
+	     {-1, GPIOF_OUT_INIT_LOW, "General Output #1"},
+	     {-1, GPIOF_OUT_INIT_LOW, "General Output #2"}
+     },
+};
+
+
+static const struct of_device_id argus_ups_of_ids[] = {
+	{ .compatible = "argus-ups" },
+	{ }
+};
+
+static int argus_ups_major;     /* Major device number */
+
+static struct class *argus_ups_class; /* /sys/class */
+
+dev_t argus_ups_dev;            /* Device number */
+
+static struct cdev *argus_ups_cdev; /* Character device details */
+
+static void argus_ups_function(struct work_struct *ignored); /* Work function */
+
+static DECLARE_DELAYED_WORK(argus_ups_work, argus_ups_function); /* Kernel workqueue glue */
+
+static struct workqueue_struct *argus_ups_workqueue; /* Kernel workqueue */
+
+static int debug = 0;
+module_param(debug, int, S_IRUGO);
+MODULE_PARM_DESC(debug, "Debug flag");
+
+static int shutdown = 1;
+module_param(shutdown, int, S_IRUGO);
+MODULE_PARM_DESC(shutdown, "Shutdown flag");
+
+#ifdef DEBUG_ARGUS
+static char* fs_type_names[] = {"vfat", "ext4"}; /* File system names that may need syncing. */
+#endif
+
+/* Just kick watchdog */
+
+static ssize_t argus_ups_write(struct file *filp, const char __user *buf, size_t count,
+                loff_t *f_pos)
+{
+	int i;
+        if (debug >= 3) {
+            printk("Writing to watchdog - count:%d\n", count);
+        }
+	for (i = 0; i < count; i++) {
+		gpio_set_value(info.gpios[WDG_GPIO_IDX].gpio, 1); /* Set it */
+		msleep(10);                       /* Wait */
+		gpio_set_value(info.gpios[WDG_GPIO_IDX].gpio, 0); /* End clearing it */
+		msleep(10);
+	}
+	return count;                     /* Always returns what we sent, regardsless */
+}
+
+static long argus_ups_ioctl(struct file *file,
+			   unsigned int ioctl,
+			   unsigned long param)
+{
+	if (debug >= 4) {
+		printk(KERN_ERR "ioctl: %d, param: %ld\n", ioctl, param);
+	}
+	switch(ioctl) {
+	case 10001: {
+		debug = param;
+		printk("Debug set to %d\n", debug);
+		break;
+	}
+	case 10002: {
+		unsigned char value = param & 0x0F;
+		unsigned char mask = (param >> 4) & 0x0F;
+		int i;		/* Loop iterator */
+		if (mask == 0) {
+			printk(KERN_ERR "Pointless mask of zero!\n");
+		}
+		for (i = 0; i < 4; i++) { /* For all four LEDS */
+			if (mask & (1 << i)) { /* Only masked values */
+				if (value & (1 << i)) { /* On - so gpio is hi */
+					if (debug >= 4) {
+						printk("Setting %d hi, ",
+						       info.gpios[LED1_GREEN_IDX + i].gpio);
+					}
+					gpio_set_value(info.gpios[LED1_GREEN_IDX + i].gpio, 1);
+				}
+				else {	/* Off - so gpio is lo */
+					if (debug >= 4) {
+						printk("Setting %d lo, ",
+						       info.gpios[LED1_GREEN_IDX + i].gpio);
+					}
+					gpio_set_value(info.gpios[LED1_GREEN_IDX + i].gpio, 0);
+				}
+			}
+		}
+		if (debug >= 4) {
+			printk("\n");
+		}
+		break;
+	}
+	case 10003: {
+		gpio_set_value(info.gpios[GEN_OUT1_IDX].gpio, param & 1);
+		break;
+	}
+	case 10004: {
+		gpio_set_value(info.gpios[GEN_OUT2_IDX].gpio, param & 1);
+		break;
+	}
+	default:
+	{
+		printk(KERN_ERR "Invalid ioctl %d\n", ioctl);
+		return -1;
+	}
+	}
+	return 0;
+}
+
+static int argus_ups_fasync(int fd, struct file *filp, int mode)
+{
+	printk(KERN_ERR "In argus_ups_fasync() fd:%d, filp:%p, mode:%d\n", fd, filp, mode);
+	return fasync_helper(fd, filp, mode, &info.async_queue);
+}
+
+static struct file_operations argus_ups_fops = { /* Only file operation is to kick watchdog via a write */
+	.owner =    THIS_MODULE,
+	.llseek =   NULL,
+	.read =     NULL,
+	.unlocked_ioctl = argus_ups_ioctl,
+	.write =    argus_ups_write,
+	.open =     NULL,
+	.release =  NULL,
+	.fasync = argus_ups_fasync,
+};
+
+#ifdef DEBUG_ARGUS
+static void remount_sb(struct super_block *sb)
+{
+	int flags =  MS_RDONLY;
+	int result = sb->s_op->remount_fs(sb, &flags, "");
+	if (debug) {
+		printk("Processing superblock %p\n", sb);
+		printk("Remount operation returned %d\n", result);
+	}
+}
+#endif
+
+static void argus_ups_function(struct work_struct *ignored)
+{
+	static int testdata = 0;       /* Data for test */
+	int i;                      /* Iterator */
+	testdata++;
+	if (!gpio_get_value(info.gpios[REQ_GPIO_IDX].gpio)) {
+                queue_delayed_work(argus_ups_workqueue, &argus_ups_work, HZ/100); /* Re-queue in 10mS*/
+		return;
+        }
+	printk(KERN_ERR "Request received\n");
+	if (debug) {
+		printk("Shutdown request received from UPS\n");
+	}
+	if (!shutdown) {
+		printk("Shutdown request ignored\n");
+		return;
+	}
+
+	if (debug) {
+		printk("Sending async kill SIGIO to %p\n", info.async_queue);
+	}
+	if (info.async_queue) { /* Try and tell usermode to halt system */
+		kill_fasync(&info.async_queue, SIGIO, POLL_IN);
+	}
+	gpio_set_value(info.gpios[LED1_GREEN_IDX].gpio, 0); /* Turn off green LED1 */
+	for (i = 0; i < 300; i++) { /* Toggle acknowledge at 10 Hz for 15 seconds */
+		if (debug >= 2) {
+			printk("Waiting for first shutdown request:%d\n", i);
+		}
+		gpio_set_value(info.gpios[ACK_GPIO_IDX].gpio, i & 1); /* Toggle acknowledge */
+		gpio_set_value(info.gpios[LED1_RED_IDX].gpio, i & 1); /* and LED1 red */
+		msleep(50); /* Wait in 50ms increments */
+	}
+
+	{
+		char *argv[] = { "/sbin/halt", NULL };
+		static char *envp[] = {
+			"HOME=/",
+			"TERM=linux",
+			"PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin", NULL };
+
+		call_usermodehelper( argv[0], argv, envp, UMH_WAIT_PROC );
+	}
+	for (i = 0; i < 300; i++) { /* Toggle acknowledge at 10 Hz for 15 more seconds */
+		if (debug >= 2) {
+			printk("Waiting for second shutdown request:%d\n", i);
+		}
+		gpio_set_value(info.gpios[ACK_GPIO_IDX].gpio, i & 1); /* Toggle acknowledge */
+		gpio_set_value(info.gpios[LED1_RED_IDX].gpio, i & 1); /* and LED1 red */
+		msleep(50); /* Wait in 50ms increments */
+	}
+	printk(KERN_ERR "Usermode failed to halt system\n");
+	kernel_halt();	       /* Last resort - may give some oopss */
+}
+
+
+static int argus_ups_probe(struct platform_device *pdev) /* Entry point */
+{
+	struct pinctrl *pinctrl;
+	struct device_node *pnode = pdev->dev.of_node;
+	int i;
+	int ret;
+        printk("Init UPS module - debug=%d, shutdown=%d\n",
+	       debug, shutdown);
+	platform_set_drvdata(pdev, &info);
+	info.pdev = pdev;
+	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
+	if (IS_ERR(pinctrl)) {
+		dev_warn(&pdev->dev,
+			"pins are not configured from the driver\n");
+		return -1;
+	}
+	ret = of_property_read_u32(pnode, "debug", &debug);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Unable to read debug parameter\n");
+	}
+	else {
+		printk("Debug parameter read from DT:%d\n", debug);
+	}
+
+	ret = of_property_read_u32(pnode, "shutdown", &shutdown);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Unable to read shutdown parameter\n");
+	}
+	else {
+		printk("Shutdown parameter read from DT:%d\n", shutdown);
+	}
+
+	ret = of_gpio_count(pnode);
+
+	if (ret != N_GPIOS) {
+		printk(KERN_ERR "Wrong number of gpios");
+		return -1;
+	}
+
+	for (i = 0; i < of_gpio_count(pnode); i++) {
+		ret = of_get_gpio_flags(pnode, i, NULL);
+		if (debug) {
+			printk("GPIO#%d:%d\n", i, ret);
+		}
+		if (IS_ERR_VALUE(ret)) {
+			dev_err(&pdev->dev, "unable to get GPIO %d\n", i);
+			goto err_no_gpio;
+		}
+		info.gpios[i].gpio = ret;
+	}
+
+
+        ret = alloc_chrdev_region(&argus_ups_dev, 0, 2, "argus_ups");
+        argus_ups_major = MAJOR(argus_ups_dev);
+        if (ret) {
+		printk(KERN_ERR "Error %d adding argus_ups\n", ret);
+		return -1;
+        }
+	if (debug) {
+		printk("argus_ups major: %d\n", argus_ups_major);
+	}
+        argus_ups_cdev = cdev_alloc(); /* Make this a character device */
+        argus_ups_cdev->ops = &argus_ups_fops; /* File operations */
+        argus_ups_cdev->owner = THIS_MODULE;   /* Top level device */
+        ret = cdev_add(argus_ups_cdev, argus_ups_dev, 1); /* Add it to the kernel */
+        if (ret) {
+		printk(KERN_ERR "cdev_add returned %d\n", ret);
+		unregister_chrdev_region(0, 1);
+		return -1;
+	}
+        ret = gpio_request_array(info.gpios, N_GPIOS);
+	if (ret) {
+		printk(KERN_ERR "Error %d requesting GPIOs\n", ret);
+		unregister_chrdev_region(0, 1);
+		return -1;
+        }
+
+        argus_ups_class = class_create(THIS_MODULE, "argus_ups"); /* /sys/class entry for udev */
+        if (IS_ERR(argus_ups_class)) {
+		printk(KERN_ERR "Error creating argus_ups_class\n");
+		unregister_chrdev_region(0, 1);
+		return -1;
+	}
+	device_create(argus_ups_class, NULL, MKDEV(argus_ups_major, 0), NULL, "argus_ups");
+        argus_ups_workqueue = create_singlethread_workqueue("argus_ups");
+        INIT_DELAYED_WORK(&argus_ups_work, argus_ups_function);
+        queue_delayed_work(argus_ups_workqueue, &argus_ups_work, 0); /* Start work immediately */
+
+        return 0;
+err_no_gpio:
+	return ret;
+
+}
+
+
+static void argus_ups_cleanup(void)
+{
+	printk("Module cleanup called\n");
+        while (cancel_delayed_work(&argus_ups_work) == 0) {
+		flush_workqueue(argus_ups_workqueue); /* Make sure all work is completed */
+	}
+	destroy_workqueue(argus_ups_workqueue);
+	gpio_free_array(info.gpios, N_GPIOS);
+	device_destroy(argus_ups_class, argus_ups_dev);
+	class_destroy(argus_ups_class);
+        unregister_chrdev_region(argus_ups_dev, 1);
+        cdev_del(argus_ups_cdev);
+}
+
+
+
+static int argus_ups_remove(struct platform_device *pdev)
+{
+	printk("In argus_ups_remove()\n");
+	argus_ups_cleanup();
+	printk("After cleanup\n");
+	return 0;
+}
+
+#define ARGUS_UPS_PM_OPS NULL
+
+struct platform_driver argus_ups_driver = {
+	.probe		= argus_ups_probe,
+	.remove		= argus_ups_remove,
+	.driver = {
+		.name		= "argus-ups",
+		.owner		= THIS_MODULE,
+		.pm		= ARGUS_UPS_PM_OPS,
+		.of_match_table = argus_ups_of_ids,
+	},
+};
+
+
+static int __init argus_ups_init(void)
+{
+	return platform_driver_probe(&argus_ups_driver,
+				     argus_ups_probe);
+}
+
+static void __exit argus_ups_exit(void)
+{
+	platform_driver_unregister(&argus_ups_driver);
+	printk("After driver unregister\n");
+}
+
+module_init(argus_ups_init);
+module_exit(argus_ups_exit);
+
+/*
+ * Get rid of taint message.
+ */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David Lambert");	/* Who wrote this module? */
+MODULE_DESCRIPTION("Argus UPS control"); /* What does this module do */
+MODULE_ALIAS("platform:argus-ups");
+MODULE_DEVICE_TABLE(of, argus_ups_of_ids);
diff --git b/drivers/misc/devovmgr.c b/drivers/misc/devovmgr.c
new file mode 100644
index 0000000..d5c8d1d
--- /dev/null
+++ b/drivers/misc/devovmgr.c
@@ -0,0 +1,1306 @@
+/*
+ * Device overlay manager
+ *
+ * Copyright (C) 2015 Konsulko Group
+ * Pantelis Antoniou <pantelis.antoniou@konsulko.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/ctype.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/spinlock.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/configfs.h>
+#include <linux/types.h>
+#include <linux/stat.h>
+#include <linux/limits.h>
+#include <linux/file.h>
+#include <linux/vmalloc.h>
+#include <linux/firmware.h>
+#include <linux/pci.h>
+#include <linux/usb.h>
+#include <linux/mod_devicetable.h>
+#include <linux/workqueue.h>
+#include <linux/firmware.h>
+
+enum dovmgr_type {
+	ITEM_PCI,
+	ITEM_USB
+};
+
+struct dovmgr_item;
+
+struct dovmgr_dev_item {
+	struct dovmgr_item *item;
+	struct list_head node;
+	struct device *dev;
+	const struct firmware *fw;
+	struct device_node *overlay;
+	int overlay_id;
+	struct work_struct work;
+};
+
+struct dovmgr_item {
+	struct config_item item;
+	char *path;
+	bool enable;
+	char *overlay_name;
+	struct mutex dev_item_mutex;
+	struct list_head dev_item_list;
+	enum dovmgr_type type;
+	union {
+		struct pci_device_id pci;
+		struct usb_device_id usb;
+	};
+};
+
+struct config_group dovmgr_pci_group;
+struct config_group dovmgr_usb_group;
+
+static inline struct dovmgr_item *to_dovmgr_item(struct config_item *cfsitem)
+{
+	if (!cfsitem)
+		return NULL;
+
+	return container_of(cfsitem, struct dovmgr_item, item);
+}
+
+static int dovmgr_notifier_action(struct config_group *group,
+		unsigned long action, struct device *dev,
+		int (*do_match)(struct dovmgr_item *item, struct device *dev),
+		int (*do_action)(struct dovmgr_item *item, unsigned long action,
+			struct device *dev))
+{
+	struct config_item *cfsitem;
+	struct dovmgr_item *item;
+	int ret;
+
+	/* only handle device notifiers */
+	if (action != BUS_NOTIFY_ADD_DEVICE &&
+		action != BUS_NOTIFY_DEL_DEVICE &&
+		action != BUS_NOTIFY_REMOVED_DEVICE)
+		return 0;
+
+	ret = 0;
+	list_for_each_entry(cfsitem, &group->cg_children, ci_entry) {
+		item = to_dovmgr_item(cfsitem);
+		if (!item->enable || !(*do_match)(item, dev))
+			continue;
+
+		ret = (*do_action)(item, action, dev);
+		if (ret != 0)
+			break;
+	}
+	return ret;
+}
+
+#if IS_ENABLED(CONFIG_PCI)
+/* copy of drivers/pci/pci.h */
+static inline const struct pci_device_id *
+pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
+{
+	if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
+	    (id->device == PCI_ANY_ID || id->device == dev->device) &&
+	    (id->subvendor == PCI_ANY_ID ||
+		id->subvendor == dev->subsystem_vendor) &&
+	    (id->subdevice == PCI_ANY_ID ||
+		id->subdevice == dev->subsystem_device) &&
+	    !((id->class ^ dev->class) & id->class_mask))
+		return id;
+	return NULL;
+}
+
+static int dovmgr_pci_item_match(struct dovmgr_item *item, struct device *dev)
+{
+	struct pci_dev *pdev;
+
+	BUG_ON(item->type != ITEM_PCI);
+	pdev = to_pci_dev(dev);
+
+	return pci_match_one_device(&item->pci, pdev) != NULL;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_USB)
+/* in drivers/usb/core/driver.c */
+extern int usb_match_device(struct usb_device *dev,
+		const struct usb_device_id *id);
+
+static int dovmgr_usb_item_match(struct dovmgr_item *item, struct device *dev)
+{
+	struct usb_device *udev;
+
+	BUG_ON(item->type != ITEM_USB);
+	udev = to_usb_device(dev);
+
+	return usb_match_device(udev, &item->usb);
+}
+#endif
+
+static struct dovmgr_dev_item *dovmgr_lookup_dev_item(struct dovmgr_item *item,
+		struct device *dev)
+{
+	struct dovmgr_dev_item *ditem;
+
+	list_for_each_entry(ditem, &item->dev_item_list, node)
+		if (ditem->dev == dev)
+			return ditem;
+	return NULL;
+}
+
+static void dovmgr_item_work_func(struct work_struct *work)
+{
+	struct dovmgr_dev_item *ditem = container_of(work,
+			struct dovmgr_dev_item, work);
+	struct dovmgr_item *item = ditem->item;
+	struct device *dev;
+	struct device_node *np;
+	int err;
+
+	mutex_lock(&item->dev_item_mutex);
+
+	dev = ditem->dev;
+	np = dev->of_node;
+	if (!dev || !np || !item->overlay_name || ditem->overlay_id >= 0)
+		goto out_unlock;
+
+	pr_info("%s: %s %s\n", __func__,
+		kobject_name(&dev->kobj), of_node_full_name(np));
+
+	err = request_firmware_direct(&ditem->fw, item->overlay_name, dev);
+	if (err != 0) {
+		pr_err("%s: %s failed to load firmware '%s'\n", __func__,
+			kobject_name(&dev->kobj), item->overlay_name);
+		goto out_unlock;
+	}
+
+	of_fdt_unflatten_tree((void *)ditem->fw->data, &ditem->overlay);
+	if (ditem->overlay == NULL) {
+		pr_err("%s: %s failed to load firmware '%s'\n", __func__,
+			kobject_name(&dev->kobj), item->overlay_name);
+		goto out_release_fw;
+	}
+
+	/* mark it as detached */
+	of_node_set_flag(ditem->overlay, OF_DETACHED);
+
+	/* perform resolution */
+	err = of_resolve_phandles(ditem->overlay);
+	if (err != 0) {
+		pr_err("%s: %s failed to resolve tree\n", __func__,
+			kobject_name(&dev->kobj));
+		goto out_release_overlay;
+	}
+
+	err = of_overlay_create_target_root(ditem->overlay, np);
+	if (err < 0) {
+		pr_err("%s: %s failed to create overlay\n", __func__,
+			kobject_name(&dev->kobj));
+		goto out_release_overlay;
+	}
+	ditem->overlay_id = err;
+
+out_unlock:
+	mutex_unlock(&item->dev_item_mutex);
+	return;
+
+out_release_overlay:
+	/* TODO: free the overlay, we can't right now cause
+	 * the unflatten method does not track it */
+	ditem->overlay = NULL;
+out_release_fw:
+	release_firmware(ditem->fw);
+	ditem->fw = NULL;
+	goto out_unlock;
+}
+
+/* dev item list mutex lock must be held */
+static int dovmgr_add_dev_item(struct dovmgr_item *item, struct device *dev)
+{
+	struct dovmgr_dev_item *ditem;
+
+	/* first make sure there's no duplicate */
+	if (dovmgr_lookup_dev_item(item, dev))
+		return -EEXIST;
+
+	/* add the device item */
+	ditem = kzalloc(sizeof(*ditem), GFP_KERNEL);
+	if (!ditem)
+		return -ENOMEM;
+	ditem->overlay_id = -1;
+	ditem->dev = get_device(dev);
+	INIT_WORK(&ditem->work, dovmgr_item_work_func);
+	ditem->item = item;
+
+	list_add_tail(&ditem->node, &item->dev_item_list);
+
+	pr_info("%s: added device %s from item's %s list\n", __func__,
+			kobject_name(&dev->kobj),
+			config_item_name(&item->item));
+
+	/* now schedule the overlay application */
+	if (item->overlay_name)
+		schedule_work(&ditem->work);
+
+	return 0;
+}
+
+static int dovmgr_remove_dev_item(struct dovmgr_item *item, struct device *dev)
+{
+	struct dovmgr_dev_item *ditem;
+
+	/* find it */
+	ditem = dovmgr_lookup_dev_item(item, dev);
+	if (!ditem)
+		return -ENODEV;
+
+	if (work_pending(&ditem->work))
+		cancel_work_sync(&ditem->work);
+
+	if (ditem->overlay_id >= 0) {
+		of_overlay_destroy(ditem->overlay_id);
+		ditem->overlay_id = -1;
+
+	}
+
+	if (ditem->overlay) {
+		/* TODO: free the overlay, we can't right now cause
+		* the unflatten method does not track it */
+		ditem->overlay = NULL;
+	}
+
+	if (ditem->fw) {
+		/* TODO release_firmware(ditem->fw); */
+		release_firmware(ditem->fw);
+		ditem->fw = NULL;
+	}
+
+	put_device(ditem->dev);
+	list_del(&ditem->node);
+
+	kfree(ditem);
+
+	pr_info("%s: removed device %s from item's %s list\n", __func__,
+			kobject_name(&dev->kobj),
+			config_item_name(&item->item));
+
+	return 0;
+}
+
+static int dovmgr_item_notify(struct dovmgr_item *item,
+		unsigned long action, struct device *dev)
+{
+	int ret;
+
+	ret = 0;
+	mutex_lock(&item->dev_item_mutex);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		pr_info("%s: BUS_NOTIFY_ADD_DEVICE for %s\n", __func__,
+				kobject_name(&dev->kobj));
+
+		ret = dovmgr_add_dev_item(item, dev);
+		if (ret != 0)
+			goto out_unlock;
+
+		break;
+
+	case BUS_NOTIFY_DEL_DEVICE:
+		pr_info("%s: BUS_NOTIFY_DEL_DEVICE for %s\n", __func__,
+				kobject_name(&dev->kobj));
+		break;
+
+	case BUS_NOTIFY_REMOVED_DEVICE:
+		pr_info("%s: BUS_NOTIFY_REMOVE_DEVICE for %s\n", __func__,
+				kobject_name(&dev->kobj));
+
+		ret = dovmgr_remove_dev_item(item, dev);
+		if (ret != 0)
+			goto out_unlock;
+
+		break;
+	}
+
+out_unlock:
+	mutex_unlock(&item->dev_item_mutex);
+
+	return ret;
+}
+
+#if IS_ENABLED(CONFIG_PCI)
+static int dovmgr_pci_add_iterator(struct device *dev, void *data)
+{
+	struct dovmgr_item *item = data;
+
+	/* do add match */
+	if (!item->enable || !dovmgr_pci_item_match(item, dev))
+		return 0;
+
+	pr_info("%s: dev=%s\n", __func__, kobject_name(&dev->kobj));
+
+	return dovmgr_item_notify(item, BUS_NOTIFY_ADD_DEVICE, dev);
+}
+
+static int dovmgr_pci_removed_iterator(struct device *dev, void *data)
+{
+	struct dovmgr_item *item = data;
+
+	/* do add match */
+	if (item->enable || !dovmgr_pci_item_match(item, dev))
+		return 0;
+
+	pr_info("%s: dev=%s\n", __func__, kobject_name(&dev->kobj));
+
+	return dovmgr_item_notify(item, BUS_NOTIFY_REMOVED_DEVICE, dev);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_USB)
+static int dovmgr_usb_add_iterator(struct device *dev, void *data)
+{
+	struct dovmgr_item *item = data;
+
+	/* do add match */
+	if (item->enable || !dovmgr_usb_item_match(item, dev))
+		return 0;
+
+	pr_info("%s: dev=%s\n", __func__, kobject_name(&dev->kobj));
+
+	return dovmgr_item_notify(item, BUS_NOTIFY_ADD_DEVICE, dev);
+}
+
+static int dovmgr_usb_removed_iterator(struct device *dev, void *data)
+{
+	struct dovmgr_item *item = data;
+
+	/* do add match */
+	if (!item->enable || !dovmgr_usb_item_match(item, dev))
+		return 0;
+
+	pr_info("%s: dev=%s\n", __func__, kobject_name(&dev->kobj));
+
+	return dovmgr_item_notify(item, BUS_NOTIFY_REMOVED_DEVICE, dev);
+}
+#endif
+
+static int dovmgr_item_set_enable(struct dovmgr_item *item, bool new_enable)
+{
+	int ret;
+
+	if (new_enable == item->enable)
+		return 0;
+
+	item->enable = new_enable;
+	switch (item->type) {
+#if IS_ENABLED(CONFIG_PCI)
+	case ITEM_PCI:
+		ret = bus_for_each_dev(&pci_bus_type, NULL, item,
+			new_enable ? dovmgr_pci_add_iterator :
+					dovmgr_pci_removed_iterator);
+		if (ret != 0)
+			return ret;
+		break;
+#endif
+#if IS_ENABLED(CONFIG_USB)
+	case ITEM_USB:
+		ret = bus_for_each_dev(&usb_bus_type, NULL, item,
+			new_enable ? dovmgr_usb_add_iterator :
+					dovmgr_usb_removed_iterator);
+		if (ret != 0)
+			return ret;
+		break;
+#endif
+	default:
+		break;
+	}
+	return 0;
+}
+
+
+static ssize_t dovmgr_item_str_show(struct dovmgr_item *item,
+		char *page, char **strp)
+{
+	return snprintf(page, PAGE_SIZE, "%s\n",
+			*strp ? *strp : "");
+}
+
+static ssize_t dovmgr_item_str_store(struct dovmgr_item *item,
+		const char *page, size_t count, char **strp)
+{
+	const char *s;
+	int len;
+
+	/* copy to path buffer (and make sure it's always zero terminated */
+	len = strnlen(page, PAGE_SIZE);
+	if (len >= PAGE_SIZE)
+		return -EINVAL;
+	s = page + len;
+	while (len > 0 && *--s == '\n')
+		len--;
+	if (len == 0)
+		return -EINVAL;
+
+	if (*strp)
+		kfree(*strp);
+	*strp = kmalloc(len + 1, GFP_KERNEL);
+	if (!*strp)
+		return -ENOMEM;
+	memcpy(*strp, page, len);
+	(*strp)[len + 1] = '\0';
+
+	return count;
+}
+
+static ssize_t dovmgr_item_path_show(struct config_item *citem, char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	return dovmgr_item_str_show(item, page, &item->path);
+}
+
+static ssize_t dovmgr_item_path_store(struct config_item *citem,
+		const char *page, size_t count)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	return dovmgr_item_str_store(item, page, count, &item->path);
+}
+
+static ssize_t dovmgr_item_enable_show(struct config_item *citem, char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	return snprintf(page, PAGE_SIZE, "%u\n", !!item->enable);
+}
+
+static ssize_t dovmgr_item_enable_store(struct config_item *citem,
+		const char *page, size_t count)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(page, 0, &val);
+	if (ret != 0)
+		return ret;
+
+	ret = dovmgr_item_set_enable(item, !!val);
+	if (ret != 0)
+		return ret;
+
+	return count;
+}
+
+static ssize_t dovmgr_item_overlay_show(struct config_item *citem, char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	ssize_t ret;
+
+	mutex_lock(&item->dev_item_mutex);
+	ret = snprintf(page, PAGE_SIZE, "%s\n", item->overlay_name ?
+			item->overlay_name : "");
+	mutex_unlock(&item->dev_item_mutex);
+	return ret;
+};
+
+
+static ssize_t dovmgr_item_overlay_store(struct config_item *citem,
+		const char *page, size_t count)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	ssize_t ret;
+
+	mutex_lock(&item->dev_item_mutex);
+	kfree(item->overlay_name);
+	item->overlay_name = kstrndup(page, PAGE_SIZE, GFP_KERNEL);
+	if (!item->overlay_name)
+		ret = -ENOMEM;
+	else
+		ret = count;
+	mutex_unlock(&item->dev_item_mutex);
+	return ret;
+}
+
+static ssize_t dovmgr_item_status_show(struct config_item *citem, char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	struct dovmgr_dev_item *ditem;
+	char *p, *e;
+	int len;
+
+	p = page;
+	e = page + PAGE_SIZE;
+
+	mutex_lock(&item->dev_item_mutex);
+	list_for_each_entry(ditem, &item->dev_item_list, node) {
+		len = snprintf(p, e - p, "%s:%s:%d\n",
+			kobject_name(&ditem->dev->kobj),
+			of_node_full_name(ditem->dev->of_node),
+			ditem->overlay_id);
+		p += len;
+		if (p >= e - 1)
+			break;
+	}
+	mutex_unlock(&item->dev_item_mutex);
+
+	return p - page;
+}
+
+CONFIGFS_ATTR(dovmgr_item_, path);
+CONFIGFS_ATTR_RO(dovmgr_item_, status);
+CONFIGFS_ATTR(dovmgr_item_, enable);
+CONFIGFS_ATTR(dovmgr_item_, overlay);
+
+#if IS_ENABLED(CONFIG_PCI)
+static ssize_t dovmgr_item_pci_device_show(struct config_item *citem,
+		char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	return snprintf(page, PAGE_SIZE, "0x%04x\n", item->pci.device);
+}
+
+static ssize_t dovmgr_item_pci_device_store(struct config_item *citem,
+		const char *page, size_t count)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	int ret;
+	unsigned int val;
+
+	ret = kstrtouint(page, 0, &val);
+	if (ret != 0)
+		return ret;
+	item->pci.device = val;
+	return count;
+}
+
+static ssize_t dovmgr_item_pci_vendor_show(struct config_item *citem,
+		char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	return snprintf(page, PAGE_SIZE, "0x%04x\n", item->pci.vendor);
+}
+
+static ssize_t dovmgr_item_pci_vendor_store(struct config_item *citem,
+		const char *page, size_t count)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	int ret;
+	unsigned int val;
+
+	/* cannot modify when item is enabled */
+	if (item->enable)
+		return -EBUSY;
+
+	ret = kstrtouint(page, 0, &val);
+	if (ret != 0)
+		return ret;
+	item->pci.vendor = val;
+	return count;
+}
+
+static ssize_t dovmgr_item_pci_subdevice_show(struct config_item *citem,
+		char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	return snprintf(page, PAGE_SIZE, "0x%08x\n", item->pci.subdevice);
+}
+
+static ssize_t dovmgr_item_pci_subdevice_store(struct config_item *citem,
+		const char *page, size_t count)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	int ret;
+	unsigned int val;
+
+	/* cannot modify when item is enabled */
+	if (item->enable)
+		return -EBUSY;
+
+	ret = kstrtouint(page, 0, &val);
+	if (ret != 0)
+		return ret;
+	item->pci.subdevice = val;
+	return count;
+}
+
+static ssize_t dovmgr_item_pci_subvendor_show(struct config_item *citem,
+		char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	return snprintf(page, PAGE_SIZE, "0x%08x\n", item->pci.subvendor);
+}
+
+static ssize_t dovmgr_item_pci_subvendor_store(struct config_item *citem,
+		const char *page, size_t count)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	int ret;
+	unsigned int val;
+
+	/* cannot modify when item is enabled */
+	if (item->enable)
+		return -EBUSY;
+
+	ret = kstrtouint(page, 0, &val);
+	if (ret != 0)
+		return ret;
+	item->pci.subvendor = val;
+	return count;
+}
+
+static ssize_t dovmgr_item_pci_class_show(struct config_item *citem, char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	return snprintf(page, PAGE_SIZE, "0x%04x\n", item->pci.class);
+}
+
+static ssize_t dovmgr_item_pci_class_store(struct config_item *citem,
+		const char *page, size_t count)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	int ret;
+	unsigned int val;
+
+	/* cannot modify when item is enabled */
+	if (item->enable)
+		return -EBUSY;
+
+	ret = kstrtouint(page, 0, &val);
+	if (ret != 0)
+		return ret;
+	item->pci.class = val;
+	return count;
+}
+
+static ssize_t dovmgr_item_pci_class_mask_show(struct config_item *citem,
+		char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	return snprintf(page, PAGE_SIZE, "0x%04x\n", item->pci.class_mask);
+}
+
+static ssize_t dovmgr_item_pci_class_mask_store(struct config_item *citem,
+		const char *page, size_t count)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	int ret;
+	unsigned int val;
+
+	/* cannot modify when item is enabled */
+	if (item->enable)
+		return -EBUSY;
+
+	ret = kstrtouint(page, 0, &val);
+	if (ret != 0)
+		return ret;
+	item->pci.class_mask = val;
+	return count;
+}
+
+CONFIGFS_ATTR(dovmgr_item_pci_, device);
+CONFIGFS_ATTR(dovmgr_item_pci_, vendor);
+CONFIGFS_ATTR(dovmgr_item_pci_, subdevice);
+CONFIGFS_ATTR(dovmgr_item_pci_, subvendor);
+CONFIGFS_ATTR(dovmgr_item_pci_, class);
+CONFIGFS_ATTR(dovmgr_item_pci_, class_mask);
+#endif
+
+#if IS_ENABLED(CONFIG_USB)
+static ssize_t dovmgr_item_usb_idProduct_show(struct config_item *citem,
+		char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	return snprintf(page, PAGE_SIZE, "0x%04x\n",
+			item->usb.idProduct);
+}
+
+static ssize_t dovmgr_item_usb_idProduct_store(struct config_item *citem,
+		const char *page, size_t count)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	int ret;
+	unsigned int val;
+
+	/* cannot modify when item is enabled */
+	if (item->enable)
+		return -EBUSY;
+
+	ret = kstrtouint(page, 0, &val);
+	if (ret != 0)
+		return ret;
+	item->usb.idProduct = val;
+	return count;
+}
+
+static ssize_t dovmgr_item_usb_idVendor_show(struct config_item *citem,
+		char *page)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	return snprintf(page, PAGE_SIZE, "0x%04x\n",
+			item->usb.idVendor);
+}
+
+static ssize_t dovmgr_item_usb_idVendor_store(struct config_item *citem,
+		const char *page, size_t count)
+{
+	struct dovmgr_item *item = to_dovmgr_item(citem);
+	int ret;
+	unsigned int val;
+
+	/* cannot modify when item is enabled */
+	if (item->enable)
+		return -EBUSY;
+
+	ret = kstrtouint(page, 0, &val);
+	if (ret != 0)
+		return ret;
+	item->usb.idVendor = val;
+	return count;
+}
+
+CONFIGFS_ATTR(dovmgr_item_usb_, idProduct);
+CONFIGFS_ATTR(dovmgr_item_usb_, idVendor);
+#endif
+
+#if IS_ENABLED(CONFIG_PCI)
+static struct configfs_attribute *dovmgr_pci_attrs[] = {
+	&dovmgr_item_attr_path,
+	&dovmgr_item_attr_status,
+	&dovmgr_item_attr_enable,
+	&dovmgr_item_attr_overlay,
+	&dovmgr_item_pci_attr_device,
+	&dovmgr_item_pci_attr_vendor,
+	&dovmgr_item_pci_attr_subdevice,
+	&dovmgr_item_pci_attr_subvendor,
+	&dovmgr_item_pci_attr_class,
+	&dovmgr_item_pci_attr_class_mask,
+	NULL,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_USB)
+static struct configfs_attribute *dovmgr_usb_attrs[] = {
+	&dovmgr_item_attr_path,
+	&dovmgr_item_attr_enable,
+	&dovmgr_item_attr_status,
+	&dovmgr_item_attr_overlay,
+	&dovmgr_item_usb_attr_idVendor,
+	&dovmgr_item_usb_attr_idProduct,
+	NULL,
+};
+#endif
+
+static void dovmgr_release(struct config_item *cfsitem)
+{
+	struct dovmgr_item *item = to_dovmgr_item(cfsitem);
+
+	/* disable item (this removes the overlay and all) */
+	dovmgr_item_set_enable(item, false);
+
+	kfree(item->path);
+	kfree(item);
+}
+
+static struct configfs_item_operations dovmgr_item_ops = {
+	.release		= dovmgr_release,
+};
+
+#if IS_ENABLED(CONFIG_PCI)
+static struct config_item_type dovmgr_pci_item_type = {
+	.ct_item_ops	= &dovmgr_item_ops,
+	.ct_attrs	= dovmgr_pci_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_USB)
+static struct config_item_type dovmgr_usb_item_type = {
+	.ct_item_ops	= &dovmgr_item_ops,
+	.ct_attrs	= dovmgr_usb_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+#endif
+
+static struct config_item *dovmgr_group_make_item(
+		struct config_group *group, const char *name,
+		enum dovmgr_type type)
+{
+	struct dovmgr_item *item;
+	struct config_item_type *item_type;
+
+	switch (type) {
+#if IS_ENABLED(CONFIG_PCI)
+	case ITEM_PCI:
+		item_type = &dovmgr_pci_item_type;
+		break;
+#endif
+#if IS_ENABLED(CONFIG_USB)
+	case ITEM_USB:
+		item_type = &dovmgr_usb_item_type;
+		break;
+#endif
+	default:
+		return ERR_PTR(-EINVAL);
+	};
+
+	item = kzalloc(sizeof(*item), GFP_KERNEL);
+	if (!item)
+		return ERR_PTR(-ENOMEM);
+
+	item->type = type;
+	item->enable = false;
+	mutex_init(&item->dev_item_mutex);
+	INIT_LIST_HEAD(&item->dev_item_list);
+
+	switch (type) {
+#if IS_ENABLED(CONFIG_PCI)
+	case ITEM_PCI:
+		/* default for matching device/vendor */
+		item->pci.vendor = PCI_ANY_ID;
+		item->pci.device = PCI_ANY_ID;
+		item->pci.subvendor = PCI_ANY_ID;
+		item->pci.subdevice = PCI_ANY_ID;
+		item->pci.class = 0;
+		item->pci.class_mask = 0;
+		break;
+#endif
+#if IS_ENABLED(CONFIG_USB)
+	case ITEM_USB:
+		/* default */
+		item->usb.match_flags = USB_DEVICE_ID_MATCH_DEVICE;
+		break;
+#endif
+	default:
+		return ERR_PTR(-EINVAL);
+	};
+
+	config_item_init_type_name(&item->item, name, item_type);
+	return &item->item;
+}
+
+#if IS_ENABLED(CONFIG_PCI)
+static struct config_item *dovmgr_group_pci_make_item(
+		struct config_group *group, const char *name)
+{
+	return dovmgr_group_make_item(group, name, ITEM_PCI);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_USB)
+static struct config_item *dovmgr_group_usb_make_item(
+		struct config_group *group, const char *name)
+{
+	return dovmgr_group_make_item(group, name, ITEM_USB);
+}
+#endif
+
+static void dovmgr_group_drop_item(struct config_group *group,
+		struct config_item *cfsitem)
+{
+	struct dovmgr_item *item = to_dovmgr_item(cfsitem);
+
+	switch (item->type) {
+#if IS_ENABLED(CONFIG_PCI)
+	case ITEM_PCI:
+		break;
+#endif
+#if IS_ENABLED(CONFIG_USB)
+	case ITEM_USB:
+		break;
+#endif
+	default:
+		break;
+	}
+	config_item_put(&item->item);
+}
+
+#if IS_ENABLED(CONFIG_PCI)
+static struct configfs_group_operations dovmgr_pci_group_ops = {
+	.make_item	= dovmgr_group_pci_make_item,
+	.drop_item	= dovmgr_group_drop_item,
+};
+
+static struct config_item_type dovmgr_pci_type = {
+	.ct_group_ops   = &dovmgr_pci_group_ops,
+	.ct_owner       = THIS_MODULE,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_USB)
+static struct configfs_group_operations dovmgr_usb_group_ops = {
+	.make_item	= dovmgr_group_usb_make_item,
+	.drop_item	= dovmgr_group_drop_item,
+};
+
+static struct config_item_type dovmgr_usb_type = {
+	.ct_group_ops   = &dovmgr_usb_group_ops,
+	.ct_owner       = THIS_MODULE,
+};
+#endif
+
+static struct configfs_group_operations dovmgr_ops = {
+	/* empty - we don't allow anything to be created */
+};
+
+static struct config_item_type dovmgr_type = {
+	.ct_group_ops   = &dovmgr_ops,
+	.ct_owner       = THIS_MODULE,
+};
+
+struct config_group *dovmgr_def_groups[] = {
+#if IS_ENABLED(CONFIG_PCI)
+	&dovmgr_pci_group,
+#endif
+#if IS_ENABLED(CONFIG_USB)
+	&dovmgr_usb_group,
+#endif
+	NULL
+};
+
+static struct configfs_subsystem dovmgr_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "dovmgr",
+			.ci_type = &dovmgr_type,
+		},
+		.default_groups = dovmgr_def_groups,
+	},
+	.su_mutex = __MUTEX_INITIALIZER(dovmgr_subsys.su_mutex),
+};
+
+#if IS_ENABLED(CONFIG_PCI)
+static int pci_dev_instantiate(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device *bus_dev;
+	struct of_changeset cset;
+	struct device_node *np, *npb;
+	int ret;
+
+	npb = NULL;
+
+	/* already instantiated */
+	if (dev->of_node) {
+		pr_debug("%s: dev=%s of_node=%s\n", __func__,
+			kobject_name(&dev->kobj),
+			of_node_full_name(dev->of_node));
+		return 0;
+	}
+
+	bus_dev = &pdev->bus->dev;
+
+	pr_debug("%s: %s: %02x:%02x.%02x - node %s%s\n", __func__,
+			kobject_name(&dev->kobj),
+			pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
+			bus_dev->of_node ? of_node_full_name(bus_dev->of_node) : "<NULL>",
+			pci_is_bridge(pdev) ? " bridge" : "");
+
+	/* to create the node, the bus must be present */
+	if (!bus_dev->of_node) {
+		pr_err("%s: No node for %s because no bus device node\n",
+			__func__, kobject_name(&dev->kobj));
+		return 0;
+	}
+
+	of_changeset_init(&cset);
+
+	np = of_changeset_create_device_node(&cset, bus_dev->of_node,
+		"%s/pci-%04x-%02x-%02x.%d",
+		of_node_full_name(bus_dev->of_node),
+		pci_domain_nr(pdev->bus), pdev->bus->number,
+		PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+	if (IS_ERR(np)) {
+		ret = PTR_ERR(np);
+		goto out_cset_fail;
+	}
+
+	ret = of_changeset_add_property_stringf(&cset, np, "compatible",
+			"pciclass,%04x", (pdev->class >> 8) & 0xffffff);
+	if (ret != 0)
+		goto out_cset_fail;
+
+	ret = of_changeset_add_property_u32(&cset, np, "vendor",
+			pdev->vendor);
+	if (ret != 0)
+		goto out_cset_fail;
+
+	ret = of_changeset_add_property_u32(&cset, np, "device",
+			pdev->device);
+	if (ret != 0)
+		goto out_cset_fail;
+
+	ret = of_changeset_add_property_string(&cset, np, "status", "okay");
+	if (ret != 0)
+		goto out_cset_fail;
+
+	ret = of_changeset_add_property_bool(&cset, np, "auto-generated");
+	if (ret != 0)
+		goto out_cset_fail;
+
+	ret = of_changeset_attach_node(&cset, np);
+	if (ret != 0)
+		goto out_cset_fail;
+
+	/* are we creating a bridge; swell */
+	npb = NULL;
+	if (pci_is_bridge(pdev) && !pdev->subordinate->dev.of_node) {
+
+		pr_debug("%s: %s: bus->dev=%s subordinate=%s\n", __func__,
+			kobject_name(&dev->kobj),
+			kobject_name(&pdev->bus->dev.kobj),
+			kobject_name(&pdev->subordinate->dev.kobj));
+
+		npb = of_changeset_create_device_node(&cset, bus_dev->of_node,
+			"%s/pci-%04x-%02x",
+			of_node_full_name(bus_dev->of_node),
+			pci_domain_nr(pdev->subordinate),
+			pdev->subordinate->number);
+		if (IS_ERR(npb)) {
+			ret = PTR_ERR(npb);
+			goto out_cset_fail;
+		}
+
+		ret = of_changeset_add_property_string(&cset, npb, "compatible", "generic,pci-bus");
+		if (ret != 0)
+			goto out_cset_fail;
+
+		ret = of_changeset_add_property_string(&cset, npb, "device_type", "pci");
+		if (ret != 0)
+			goto out_cset_fail;
+
+		ret = of_changeset_add_property_string(&cset, npb, "status", "okay");
+		if (ret != 0)
+			goto out_cset_fail;
+
+		ret = of_changeset_add_property_bool(&cset, npb, "auto-generated");
+		if (ret != 0)
+			goto out_cset_fail;
+
+		ret = of_changeset_attach_node(&cset, npb);
+		if (ret != 0)
+			goto out_cset_fail;
+	}
+
+	ret = of_changeset_apply(&cset);
+	if (ret != 0)
+		goto out_cset_fail;
+
+	/* permanently commit */
+	of_changeset_destroy(&cset);
+
+	/* bind the node to the device */
+	dev->of_node = np;
+	ret = sysfs_create_link(&dev->kobj, &dev->of_node->kobj,
+			"of_node");
+	if (ret)
+		pr_warn("%s: %s Error %d creating of_node link\n",
+				__func__, kobject_name(&dev->kobj), ret);
+
+	if (npb) {
+		pdev->subordinate->dev.of_node = npb;
+		ret = sysfs_create_link(&pdev->subordinate->dev.kobj, &npb->kobj,
+				"of_node");
+		if (ret)
+			pr_warn("%s: %s Error %d creating of_node link\n",
+					__func__, kobject_name(&dev->kobj), ret);
+	}
+
+
+	return 0;
+
+out_cset_fail:
+	pr_err("%s: %s Failed to apply changeset (err=%d)\n", __func__,
+		kobject_name(&dev->kobj), ret);
+	of_changeset_destroy(&cset);
+	return ret;
+}
+
+static int pci_dev_uninstantiate(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np, *npb;
+	struct of_changeset cset;
+	int ret;
+
+	/* device node must exist */
+	np = dev->of_node;
+	if (!np)
+		return 0;
+
+	/* and the auto-generated property */
+	if (!of_property_read_bool(np, "auto-generated"))
+		return 0;
+
+	of_changeset_init(&cset);
+
+	ret = of_changeset_detach_node(&cset, np);
+	if (ret != 0)
+		goto out_cset_fail;
+
+	npb = NULL;
+	if (pci_is_bridge(pdev))
+		npb = pdev->subordinate->dev.of_node;
+
+	if (npb != NULL) {
+		ret = of_changeset_detach_node(&cset, npb);
+		if (ret != 0)
+			goto out_cset_fail;
+	}
+
+	ret = of_changeset_apply(&cset);
+	if (ret != 0)
+		goto out_cset_fail;
+
+	dev->of_node = NULL;
+	if (npb != NULL)
+		pdev->subordinate->dev.of_node = NULL;
+
+	pr_debug("%s: %s: %02x:%02x.%02x\n", __func__,
+			kobject_name(&dev->kobj),
+			pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+	/* TODO iterate over the properties and free */
+
+	return 0;
+
+out_cset_fail:
+	of_changeset_destroy(&cset);
+
+	return ret;
+}
+
+static int dovmgr_pci_notify(struct notifier_block *nb,
+				unsigned long action, void *arg)
+{
+	int ret;
+
+	if (action == BUS_NOTIFY_ADD_DEVICE)
+		pci_dev_instantiate(to_pci_dev(arg));
+
+	ret = dovmgr_notifier_action(&dovmgr_pci_group, action, arg,
+			dovmgr_pci_item_match, dovmgr_item_notify);
+
+	if (action == BUS_NOTIFY_REMOVED_DEVICE)
+		pci_dev_uninstantiate(to_pci_dev(arg));
+
+	return ret;
+}
+
+static struct notifier_block dovmgr_pci_notifier = {
+	.notifier_call = dovmgr_pci_notify,
+};
+
+static int pci_instantiate_iterator(struct device *dev, void *data)
+{
+	return pci_dev_instantiate(to_pci_dev(dev));
+}
+
+static int dovmgr_pci_init(void)
+{
+	int ret;
+
+	config_group_init_type_name(&dovmgr_pci_group, "pci", &dovmgr_pci_type);
+	ret = bus_register_notifier(&pci_bus_type, &dovmgr_pci_notifier);
+	if (ret != 0) {
+		pr_err("%s: bus_register_notifier() failed\n", __func__);
+		return ret;
+	}
+
+	ret = bus_for_each_dev(&pci_bus_type, NULL, NULL,
+			pci_instantiate_iterator);
+	if (ret != 0) {
+		pr_err("%s: bus_for_each_dev() failed\n", __func__);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void dovmgr_pci_cleanup(void)
+{
+	bus_unregister_notifier(&pci_bus_type, &dovmgr_pci_notifier);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_USB)
+static int dovmgr_usb_notify(struct notifier_block *nb,
+				unsigned long action, void *arg)
+{
+	return dovmgr_notifier_action(&dovmgr_usb_group, action, arg,
+			dovmgr_usb_item_match, dovmgr_item_notify);
+}
+
+static struct notifier_block dovmgr_usb_notifier = {
+	.notifier_call = dovmgr_usb_notify,
+};
+
+static int dovmgr_usb_init(void)
+{
+	int ret;
+
+	config_group_init_type_name(&dovmgr_usb_group, "usb", &dovmgr_usb_type);
+	ret = bus_register_notifier(&usb_bus_type, &dovmgr_usb_notifier);
+	if (ret != 0) {
+		pr_err("%s: bus_register_notifier() failed\n", __func__);
+		return ret;
+	}
+	return 0;
+}
+
+static void dovmgr_usb_cleanup(void)
+{
+	bus_unregister_notifier(&usb_bus_type, &dovmgr_usb_notifier);
+}
+#endif
+
+static int __init dovmgr_init(void)
+{
+	int ret;
+
+	config_group_init(&dovmgr_subsys.su_group);
+#if IS_ENABLED(CONFIG_PCI)
+	configfs_add_default_group(&dovmgr_pci_group,
+			&dovmgr_subsys.su_group);
+#endif
+#if IS_ENABLED(CONFIG_USB)
+	configfs_add_default_group(&dovmgr_usb_group,
+			&dovmgr_subsys.su_group);
+#endif
+
+#if IS_ENABLED(CONFIG_PCI)
+	ret = dovmgr_pci_init();
+	if (ret != 0)
+		goto err_no_pci_init;
+#endif
+#if IS_ENABLED(CONFIG_USB)
+	ret = dovmgr_usb_init();
+	if (ret != 0)
+		goto err_no_usb_init;
+#endif
+
+	ret = configfs_register_subsystem(&dovmgr_subsys);
+	if (ret != 0) {
+		pr_err("%s: failed to register subsys\n", __func__);
+		goto err_no_configfs;
+	}
+	pr_info("%s: OK\n", __func__);
+	return 0;
+
+err_no_configfs:
+#if IS_ENABLED(CONFIG_USB)
+	dovmgr_usb_cleanup();
+err_no_usb_init:
+#endif
+#if IS_ENABLED(CONFIG_PCI)
+	dovmgr_pci_cleanup();
+err_no_pci_init:
+#endif
+	return ret;
+}
+late_initcall(dovmgr_init);
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 19c10dc..8339f21 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -600,7 +600,6 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	struct at24_data *at24;
 	int err;
 	unsigned i, num_addresses;
-	u8 test_byte;
 
 	if (client->dev.platform_data) {
 		chip = *(struct at24_platform_data *)client->dev.platform_data;
@@ -761,18 +760,6 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		}
 	}
 
-	i2c_set_clientdata(client, at24);
-
-	/*
-	 * Perform a one-byte test read to verify that the
-	 * chip is functional.
-	 */
-	err = at24_read(at24, 0, &test_byte, 1);
-	if (err) {
-		err = -ENODEV;
-		goto err_clients;
-	}
-
 	at24->nvmem_config.name = dev_name(&client->dev);
 	at24->nvmem_config.dev = &client->dev;
 	at24->nvmem_config.read_only = !writable;
@@ -794,6 +781,8 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		goto err_clients;
 	}
 
+	i2c_set_clientdata(client, at24);
+
 	dev_info(&client->dev, "%u byte %s EEPROM, %s, %u bytes/write\n",
 		chip.byte_len, client->name,
 		writable ? "writable" : "read-only", at24->write_max);
diff --git b/drivers/misc/tieqep.c b/drivers/misc/tieqep.c
new file mode 100644
index 0000000..bb69ad4
--- /dev/null
+++ b/drivers/misc/tieqep.c
@@ -0,0 +1,754 @@
+/*
+ * TI eQEP driver for AM33xx devices
+ *
+ * Copyright (C) 2013 Nathaniel R. Lewis - http://teknoman117.wordpress.com/
+ * Copyright (C) 2015 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ * sysfs entries
+ *	 - position = absolute - current position; relative - last latched value
+ *	 - mode => 0 - absolute; 1 - relative
+ *	 - period => sampling period for the hardware
+ *	 - enable => 0 - eQEP disabled, 1 - eQEP enabled
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/input.h>
+
+/* eQEP register offsets from its base IO address */
+#define QPOSCNT		0x0000
+#define QPOSINIT	0x0004
+#define QPOSMAX		0x0008
+#define QPOSCMP		0x000C
+#define QPOSILAT	0x0010
+#define QPOSSLAT	0x0014
+#define QPOSLAT		0x0018
+#define QUTMR		0x001C
+#define QUPRD		0x0020
+#define QWDTMR		0x0024
+#define QWDPRD		0x0026
+#define QDECCTL		0x0028
+#define QEPCTL		0x002A
+#define QCAPCTL		0x002C
+#define QPOSCTL		0x002E
+#define QEINT		0x0030
+#define QFLG		0x0032
+#define QCLR		0x0034
+#define QFRC		0x0036
+#define QEPSTS		0x0038
+#define QCTMR		0x003A
+#define QCPRD		0x003C
+#define QCTMRLAT	0x003E
+#define QCPRDLAT	0x0040
+#define QREVID		0x005C
+
+#if 0	/* if you wanted another way to modify IP registers... */
+typedef volatile u32	REG32;
+typedef volatile u16	REG16;
+struct EQEP_REGS {
+	REG32	q_poscnt;		/*	0x00	position counter */
+	REG32	q_posinit;		/*	0x04	position counter initialization */
+	REG32	q_posmax;		/*	0x08	maximum position count */
+	REG32	q_poscmp;		/*	0x0C	position compare */
+	REG32	q_posilat;		/*	0x10	index position latch */
+	REG32	q_posslat;		/*	0x14	strobe position latch */
+	REG32	q_poslat;		/*	0x18	position counter latch */
+	REG32	q_utmr;			/*	0x1C	unit timer */
+	REG32	q_uprd;			/*	0x20	unit period */
+	REG16	q_wdtmr;		/*	0x24	watchdog timer */
+	REG16	q_wdprd;		/*	0x26	watchdog period */
+	REG16	q_decctl;		/*	0x28	decoder control */
+	REG16	q_epctl;		/*	0x2A	control register */
+	REG16	q_capctl;		/*	0x2C	capture control */
+	REG16	q_posctl;		/*	0x2E	position compare control */
+	REG16	q_eint;			/*	0x30	interrupt enable */
+	REG16	q_flg;			/*	0x32	interrupt flag */
+	REG16	q_clr;			/*	0x34	interrupt clear */
+	REG16	q_frc;			/*	0x36	interrupt force */
+	REG16	q_epsts;		/*	0x38	status */
+	REG16	q_ctmr;			/*	0x3A	capture timer */
+	REG16	q_cprd;			/*	0x3C	capture period */
+	REG16	q_ctmrlat;		/*	0x3E	capture timer latch */
+	REG16	q_prdlat;		/*	0x40	capture period latch */
+	char	q_fill1[0x5c-0x40];
+	REG32	q_revid;		/*	0x5C	revision id */
+};
+#endif
+
+
+/* Bits for the QDECTL register */
+#define QSRC1		(1 << 15)
+#define QSRC0		(1 << 14)
+#define SOEN		(1 << 13)
+#define SPSEL		(1 << 12)
+#define XCR		(1 << 11)
+#define SWAP		(1 << 10)
+#define IGATE		(1 << 9)
+#define QAP		(1 << 8)
+#define QBP		(1 << 7)
+#define QIP		(1 << 6)
+#define QSP		(1 << 5)
+
+/* Bits for the QEPCTL register */
+#define FREESOFT1	(1 << 15)
+#define FREESOFT0	(1 << 14)
+#define PCRM1		(1 << 13)
+#define PCRM0		(1 << 12)
+#define SEI1		(1 << 11)
+#define SEI0		(1 << 10)
+#define IEI1		(1 << 9)
+#define IEI0		(1 << 8)
+#define SWI		(1 << 7)
+#define SEL		(1 << 6)
+#define IEL1		(1 << 5)
+#define IEL0		(1 << 4)
+#define PHEN		(1 << 3)
+#define QCLM		(1 << 2)
+#define UTE		(1 << 1)
+#define WDE		(1 << 0)
+
+/* Bits for the QCAPCTL register */
+#define CEN		(1 << 15)
+#define CCPS2		(1 << 6)
+#define CCPS0		(1 << 5)
+#define CCPS1		(1 << 4)
+#define UPPS3		(1 << 3)
+#define UPPS2		(1 << 2)
+#define UPPS1		(1 << 1)
+#define UPPS0		(1 << 0)
+
+/* Bits for the QPOSCTL register */
+#define PCSHDW		(1 << 15)
+#define PCLOAD		(1 << 14)
+#define PCPOL		(1 << 13)
+#define PCE		(1 << 12)
+#define PCSPW11		(1 << 11)
+#define PCSPW10		(1 << 10)
+#define PCSPW9		(1 << 9)
+#define PCSPW8		(1 << 8)
+#define PCSPW7		(1 << 7)
+#define PCSPW6		(1 << 6)
+#define PCSPW5		(1 << 5)
+#define PCSPW4		(1 << 4)
+#define PCSPW3		(1 << 3)
+#define PCSPW2		(1 << 2)
+#define PCSPW1		(1 << 1)
+#define PCSPW0		(1 << 0)
+
+/* Bits for the interrupt registers */
+#define EQEP_INTERRUPT_MASK	0x0FFF
+#define UTOF			(1 << 11)
+
+/* Bits to control the clock in the PWMSS subsystem */
+#define PWMSS_EQEPCLK_EN	BIT(4)
+#define PWMSS_EQEPCLK_STOP_REQ	BIT(5)
+#define PWMSS_EQEPCLK_EN_ACK	BIT(4)
+
+/*
+ * Modes for the eQEP unit
+ *	Absolute - the position entry represents the current position of the encoder.
+ *		   Poll this value and it will be notified every period nanoseconds
+ *	Relative - the position entry represents the last latched position of the encoder
+ *		   This value is latched every period nanoseconds and the internal counter
+ *		   is subsequenty reset
+ */
+#define TIEQEP_MODE_ABSOLUTE	0
+#define TIEQEP_MODE_RELATIVE	1
+
+/* Structure defining the characteristics of the eQEP unit */
+struct eqep_chip
+{
+	/* Platform device for this eQEP unit */
+	struct platform_device *pdev;
+
+	/* Pointer to the base of the memory of the eQEP unit */
+	void __iomem	*mmio_base;
+
+	/* SYSCLKOUT to the eQEP unit */
+	u32		clk_rate;
+
+	/* IRQ for the eQEP unit */
+	u16		irq;
+
+	/* Mode of the eQEP unit */
+	u8		op_mode;
+
+	/* work stuct for the notify userspace work */
+	struct work_struct notify_work;
+
+	/* Backup for driver suspension */
+	u16		prior_qepctl;
+	u16		prior_qeint;
+};
+
+/* Notify userspace work */
+static void notify_handler(struct work_struct *work)
+{
+	/* Get a reference to the eQEP driver */
+	struct eqep_chip *eqep = container_of(work, struct eqep_chip, notify_work);
+
+	/* Notify the userspace */
+	sysfs_notify(&eqep->pdev->dev.kobj, NULL, "position");
+}
+
+/* eQEP Interrupt handler */
+static irqreturn_t eqep_irq_handler(int irq, void *dev_id)
+{
+	/* Get the instance information */
+	struct platform_device	*pdev = dev_id;
+	struct eqep_chip	*eqep = platform_get_drvdata(pdev);
+
+	/* Get the interrupt flags */
+	u16 iflags = readw(eqep->mmio_base + QFLG) & EQEP_INTERRUPT_MASK;
+
+	/* Check the interrupt source(s) */
+	if (iflags & UTOF) {
+		/* Handle the unit timer overflow interrupt by notifying any potential pollers */
+		schedule_work(&eqep->notify_work);
+	}
+
+	/* Clear interrupt flags (write back triggered flags to the clear register) */
+	writew(iflags, eqep->mmio_base + QCLR);
+
+	/* Return that the IRQ was handled successfully */
+	return IRQ_HANDLED;
+}
+
+/* Function to read whether the eQEP unit is enabled or disabled */
+static ssize_t eqep_get_enabled(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* Get the instance structure */
+	struct eqep_chip *eqep = dev_get_drvdata(dev);
+	u16 enabled = 0;
+
+	/* Increment the device usage count and run pm_runtime_resume() */
+	pm_runtime_get_sync(dev);
+
+	/* Read the qep control register and mask all but the enabled bit */
+	enabled = readw(eqep->mmio_base + QEPCTL) & PHEN;
+
+	/* Return the target in string format */
+	return sprintf(buf, "%u\n", (enabled) ? 1 : 0);
+}
+
+/* Function to set if the eQEP is enabled */
+static ssize_t eqep_set_enabled(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	/* Get the instance structure */
+	int	rc;
+	u16	val;
+	u8	enabled;
+	struct eqep_chip *eqep = dev_get_drvdata(dev);
+
+	/* Convert the input string to an 8 bit uint */
+	if ((rc = kstrtou8(buf, 0, &enabled)))
+		return rc;
+
+	/* Increment the device usage count and run pm_runtime_resume() */
+	pm_runtime_get_sync(dev);
+	/* Get the existing state of QEPCTL */
+	val = readw(eqep->mmio_base + QEPCTL);
+
+	/* If we passed a number that is not 0, enable the eQEP */
+	if (enabled)
+		/* Enable the eQEP (Set PHEN in QEPCTL) */
+		val |= PHEN;
+	else
+		/* Disable the eQEP (Clear PHEN in QEPCTL) */
+		val &= ~PHEN;
+
+	/* Write flags back to control register */
+	writew(val, eqep->mmio_base + QEPCTL);
+
+	/* Return buffer length consumed (all) */
+	return count;
+}
+
+/* Function to read the current position of the eQEP */
+static ssize_t eqep_get_position(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct eqep_chip *eqep = dev_get_drvdata(dev);
+
+	s32 position = 0;
+	/* Increment the device usage count and run pm_runtime_resume() */
+	pm_runtime_get_sync(dev);
+
+	if (eqep->op_mode == TIEQEP_MODE_ABSOLUTE) {
+		position = readl(eqep->mmio_base + QPOSCNT);
+	} else if (eqep->op_mode == TIEQEP_MODE_RELATIVE) {
+		/* in relative mode, use the last latched value of the eQEP hardware */
+		position = readl(eqep->mmio_base + QPOSLAT);
+		dev_dbg(dev, "get_position:0x%08x\n", position);
+	}
+
+	return sprintf(buf, "%d\n", position);
+}
+
+/* Function to set the position of the eQEP hardware */
+static ssize_t eqep_set_position(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	int rc;
+	s32 position;
+	struct eqep_chip *eqep = dev_get_drvdata(dev);
+
+	if ((rc = kstrtos32(buf, 0, &position)))
+		return rc;
+
+	/* Increment the device usage count and run pm_runtime_resume() */
+	pm_runtime_get_sync(dev);
+	/*
+	 * If we are in absolute mode, set the position of the encoder,
+	 * discard relative mode because thats pointless
+	 */
+	if (eqep->op_mode == TIEQEP_MODE_ABSOLUTE) {
+		/* If absolute mode, set the current value of the eQEP hardware */
+		writel(position, eqep->mmio_base + QPOSCNT);
+	}
+
+	/* Return buffer length consumed (all) */
+	return count;
+}
+
+/* Function to read the period of the unit time event timer */
+static ssize_t eqep_get_timer_period(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct eqep_chip *eqep = dev_get_drvdata(dev);
+	u64 period;
+
+	/* Increment the device usage count and run pm_runtime_resume() */
+	pm_runtime_get_sync(dev);
+	/* Convert from counts per interrupt back into period_ns */
+	period = readl(eqep->mmio_base + QUPRD);
+	period = period * NSEC_PER_SEC;
+	do_div(period, eqep->clk_rate);
+
+	/* Otherwise write out the data */
+	return sprintf(buf, "%llu\n", period);
+}
+
+/* Function to set the unit timer period.  0 = off, greater than zero sets the period */
+static ssize_t eqep_set_timer_period(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	int	rc;
+	u16	tmp;
+	u64	period;
+
+	struct eqep_chip *eqep = dev_get_drvdata(dev);
+
+	if ((rc = kstrtou64(buf, 0, &period)))
+		return rc;
+
+	/* Increment the device usage count and run pm_runtime_resume() */
+	pm_runtime_get_sync(dev);
+	/* Disable the unit timer before modifying its period register */
+	tmp = readw(eqep->mmio_base + QEPCTL);
+	tmp &= ~(UTE | QCLM);
+	writew(tmp, eqep->mmio_base + QEPCTL);
+
+	/* Zero the unit timer counter register */
+	writel(0, eqep->mmio_base + QUTMR);
+
+	/* If the timer is enabled (a non-zero period has been passed) */
+	if (period) {
+		/* update the period */
+		period = period * eqep->clk_rate;
+		do_div(period, NSEC_PER_SEC);
+
+		dev_dbg(dev, "eqep_set_timer_period:%llu\n", period);
+
+		writel(period, eqep->mmio_base + QUPRD);
+
+		/* Enable unit timer, and latch QPOSLAT to QPOSCNT on timer expiration */
+		tmp |= UTE | QCLM;
+		writew(tmp, eqep->mmio_base + QEPCTL);
+	}
+
+	return count;
+}
+
+/* Function to read the mode of the eQEP hardware */
+static ssize_t eqep_get_mode(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct eqep_chip *eqep = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", eqep->op_mode);
+}
+
+/* Function to set the mode of the eQEP hardware */
+static ssize_t eqep_set_mode(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	int 	rc;
+	u16 	val;
+	u8	tmp_mode;
+	struct eqep_chip *eqep = dev_get_drvdata(dev);
+
+	if ((rc = kstrtou8(buf, 0, &tmp_mode)))
+		return rc;
+
+	dev_dbg(dev, "eqep_set_mode:%d\n", tmp_mode);
+
+	/* Increment the device usage count and run pm_runtime_resume() */
+	pm_runtime_get_sync(dev);
+	val = readw(eqep->mmio_base + QEPCTL);
+
+	if (tmp_mode == TIEQEP_MODE_ABSOLUTE) {
+		/*
+		 * In absolute mode, don't reset the hardware based on time,
+		 * so disable the unit timer position reset (Set PCRM[1:0] = 0)
+		 */
+		val &= ~(PCRM1 | PCRM0);
+
+		eqep->op_mode = TIEQEP_MODE_ABSOLUTE;
+	} else if (tmp_mode == TIEQEP_MODE_RELATIVE) {
+		/*
+		 * In relative mode, latch the value of the eQEP hardware on the
+		 * overflow of the unit timer.	So enable the unit timer position reset
+		 * (Set PCRM[1:0] = 3)
+		 */
+		val |= PCRM1 | PCRM0;
+
+		eqep->op_mode = TIEQEP_MODE_RELATIVE;
+	}
+
+	writew(val, eqep->mmio_base + QEPCTL);
+
+	return count;
+}
+
+/* Bind read/write functions to sysfs entries */
+static DEVICE_ATTR(enabled,	0644, eqep_get_enabled,		eqep_set_enabled);
+static DEVICE_ATTR(position,	0644, eqep_get_position,	eqep_set_position);
+static DEVICE_ATTR(period,	0644, eqep_get_timer_period,	eqep_set_timer_period);
+static DEVICE_ATTR(mode,	0644, eqep_get_mode,		eqep_set_mode);
+
+/* Array holding all of the sysfs entries */
+static const struct attribute *eqep_attrs[] = {
+	&dev_attr_enabled.attr,
+	&dev_attr_position.attr,
+	&dev_attr_period.attr,
+	&dev_attr_mode.attr,
+	NULL,
+};
+
+/* Driver function group */
+static const struct attribute_group eqep_device_attr_group = {
+	.attrs = (struct attribute **) eqep_attrs,
+};
+
+/* Driver compatibility list */
+static struct of_device_id eqep_of_match[] =
+{
+	{ .compatible = "ti,am33xx-eqep" },
+	{ }
+};
+
+/* Register our compatibilities for device trees */
+MODULE_DEVICE_TABLE(of, eqep_of_match);
+
+/* Create an instance of the eQEP driver */
+static int eqep_probe(struct platform_device *pdev)
+{
+	struct resource	 *r;
+	struct clk	 *clk;
+	struct eqep_chip *eqep;
+	struct pinctrl	 *pinctrl;
+
+	u64	period;
+	u16	status;
+	u32	value;
+
+	dev_info(&pdev->dev, "ver. 1.0\n");
+
+	/* Select pins provided through the device tree */
+	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
+	if (IS_ERR(pinctrl))
+	{
+		dev_warn(&pdev->dev, "unable to select pin group\n");
+	}
+
+	/* Allocate a eqep_driver object */
+	eqep = devm_kzalloc(&pdev->dev, sizeof(struct eqep_chip), GFP_KERNEL);
+	if (!eqep) {
+		dev_err(&pdev->dev, "failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	/* Get a handle to the system clock object */
+	clk = devm_clk_get(pdev->dev.parent, "fck");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		return PTR_ERR(clk);
+	}
+
+	/* Get the frequency of the system clock */
+	eqep->clk_rate = clk_get_rate(clk);
+	if (!eqep->clk_rate) {
+		dev_err(&pdev->dev, "failed to get clock rate\n");
+		return -EINVAL;
+	}
+
+	/* Get a resource containing the IRQ for this eQEP controller */
+	r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (unlikely(!r)) {
+		dev_err(&pdev->dev, "Invalid IRQ resource\n");
+		return -ENODEV;
+	}
+
+	/* Store the irq */
+	eqep->irq = r->start;
+
+	/* Get a resource containing the requested (from DT) memory address and range of eQEP controller */
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		dev_err(&pdev->dev, "no memory resource defined\n");
+		return -ENODEV;
+	}
+
+	/* Remap the eQEP controller memory into our own memory space */
+	eqep->mmio_base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(eqep->mmio_base))
+		return PTR_ERR(eqep->mmio_base);
+
+	/* Store the platform device in our eQEP data structure for later usage */
+	eqep->pdev = pdev;
+
+	/* Subscribe to the eQEP interrupt */
+	if (request_irq(eqep->irq, eqep_irq_handler, IRQF_IRQPOLL, "eqep_interrupt", pdev))
+	{
+		dev_err(&pdev->dev, "unable to request irq for eQEP\n");
+		return -ENODEV;
+	}
+
+	/* Register controls to sysfs */
+	if (sysfs_create_group(&pdev->dev.kobj, &eqep_device_attr_group))
+	{
+		dev_err(&pdev->dev, "sysfs creation failed\n");
+		return -EINVAL;
+	}
+
+	/* set QDECCTL */
+	status = 0;	/* default to Quadrature count mode, QSRC1 & QSRC0 = 0 */
+
+	/* set QSRC1 & QSRC0 bits, one of 4 count_modes. */
+	if (!of_property_read_u32(pdev->dev.of_node, "count_mode", &value) && value <= 3) {
+		status |= value << 14;
+
+		/*
+		 * in count up or count down mode, count on rising edge only
+		 * not on both edges.
+		 */
+		if (value >= 2)
+			status |= XCR;
+	}
+	dev_info(&pdev->dev, "count_mode:%d\n", value);
+
+	/* Should we invert the qa input */
+	if (!of_property_read_u32(pdev->dev.of_node, "invert_qa", &value))
+		status = value ? status | QAP : status & ~QAP;
+	dev_info(&pdev->dev, "invert_qa:%d\n", value);
+
+	/* Should we invert the qb input */
+	if (!of_property_read_u32(pdev->dev.of_node, "invert_qb", &value))
+		status = value ? status | QBP : status & ~QBP;
+	dev_info(&pdev->dev, "invert_qb:%d\n", value);
+
+	/* Should we invert the index input */
+	if (!of_property_read_u32(pdev->dev.of_node, "invert_qi", &value))
+		status = value ? status | QIP : status & ~QIP;
+	dev_info(&pdev->dev, "invert_qi:%d\n", value);
+
+	/* Should we invert the strobe input */
+	if (!of_property_read_u32(pdev->dev.of_node, "invert_qs", &value))
+		status = value ? status | QSP : status & ~QSP;
+	dev_info(&pdev->dev, "invert_qs:%d\n", value);
+
+	/* Should we swap the cha and chb inputs */
+	if (!of_property_read_u32(pdev->dev.of_node, "swap_inputs", &value))
+		status = value ? status | SWAP : status & ~SWAP;
+	dev_info(&pdev->dev, "swap_inputs:%d\n", value);
+
+	dev_info(&pdev->dev, "QDECCTL:0x%04x\n", status);
+
+	/* Write the decoder control settings back to the control register */
+	writew(status, eqep->mmio_base + QDECCTL);
+
+	writel( 0, eqep->mmio_base + QPOSINIT);
+	writel(~0, eqep->mmio_base + QPOSMAX);
+	writel( 0, eqep->mmio_base + QPOSCNT);
+
+	dev_info(&pdev->dev, "QPOSINIT:0x%08x\n", readl(eqep->mmio_base + QPOSINIT));
+	dev_info(&pdev->dev, "QPOSMAX:0x%08x\n", readl(eqep->mmio_base + QPOSMAX));
+	dev_info(&pdev->dev, "QPOSCNT:0x%08x\n", readl(eqep->mmio_base + QPOSCNT));
+
+	status = UTOF;		/* Enable Unit Time Period interrupt. */
+	if (!of_property_read_u32(pdev->dev.of_node, "omit_interrupt", &value) && value) {
+		status = 0;	/* no interrupt */
+	}
+	writew(status, eqep->mmio_base + QEINT);
+	dev_info(&pdev->dev, "omit_interrupt:%d\n", value);
+	dev_info(&pdev->dev, "QEINT:0x%04x\n", status);
+
+	/* Calculate the timer ticks per second */
+	period = 1000000000;
+	period = period * eqep->clk_rate;
+	do_div(period, NSEC_PER_SEC);
+
+	/* Set this period into the unit timer period register */
+	writel(period, eqep->mmio_base + QUPRD);
+	dev_info(&pdev->dev, "QUPRD:0x%08x\n", (u32) period);
+
+	/*
+	 * Enable the eQEP with basic position counting turned on
+	 * PHEN - Quadrature position counter enable bit
+	 * UTE	- unit timer enable
+	 * QCLM - latch QPOSLAT to QPOSCNT upon unit timer overflow
+	 * IEL0 - Latch QPOSILAT on index signal.  Rising or falling, IEL[1:0] = 0 is reserved
+	 * SWI	- Software initialization of position count register, i.e. set QPOSCNT <= QPOSINIT,
+	 *  but this bit was not being reset by hardware as advertised in TRM,
+	 *  (so omit & clear QPOSCNT manually elsewhere?)
+	 */
+	status = PHEN | UTE | QCLM | IEL0 | SWI;
+	writew(status, eqep->mmio_base + QEPCTL);
+	dev_info(&pdev->dev, "QEPCTL:0x%04x write\n", status);
+	dev_info(&pdev->dev, "QEPCTL:0x%04x read\n", readw(eqep->mmio_base + QEPCTL));
+
+	/* We default to absolute mode */
+	eqep->op_mode = TIEQEP_MODE_ABSOLUTE;
+
+	/* Enable the power management runtime */
+	pm_runtime_enable(&pdev->dev);
+
+	/* Increment the device usage count and run pm_runtime_resume() */
+	pm_runtime_get_sync(&pdev->dev);
+
+	/* Initialize the notify work struture */
+	INIT_WORK(&eqep->notify_work, notify_handler);
+
+	/* Decrement the device usage count (twice) and run pm_runtime_idle() if zero */
+	pm_runtime_put_sync(&pdev->dev);
+
+	/* Set the platform driver data to the data object we've been creating for the eQEP unit */
+	platform_set_drvdata(pdev, eqep);
+
+	/* Success! */
+	dev_info(&pdev->dev, "irq:%d, clk_rate:%u\n", eqep->irq, eqep->clk_rate);
+	return 0;
+}
+
+/* Remove an instance of the eQEP driver */
+static int eqep_remove(struct platform_device *pdev)
+{
+	/* Get the eQEP driver data from the platform device structure */
+	struct eqep_chip *eqep = platform_get_drvdata(pdev);
+
+	/* Cancel work */
+	cancel_work_sync(&eqep->notify_work);
+
+	/* Unmap from sysfs */
+	sysfs_remove_group(&pdev->dev.kobj, &eqep_device_attr_group);
+
+	/* Release important assets */
+	free_irq(eqep->irq, pdev);
+
+	/* Increment the device usage count and run pm_runtime_resume() */
+	pm_runtime_get_sync(&pdev->dev);
+
+	/* Decrement the device usage count (twice) and run pm_runtime_idle() if zero */
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_put_sync(&pdev->dev);
+
+	/* Disable the runtime power management of this device */
+	pm_runtime_disable(&pdev->dev);
+
+	/* Return success */
+	return 0;
+}
+
+/* Power management suspend device */
+static int eqep_suspend(struct device *dev)
+{
+	/* Get the eqep driver information */
+	struct eqep_chip   *eqep = dev_get_drvdata(dev);
+	u16					tmp;
+
+	/* Shut down interrupts */
+	eqep->prior_qeint = readw(eqep->mmio_base + QEINT);
+	tmp = eqep->prior_qeint & ~UTOF;
+	writew(tmp, eqep->mmio_base + QEINT);
+
+	/* Get the existing state of QEPCTL */
+	eqep->prior_qepctl = readw(eqep->mmio_base + QEPCTL);
+
+	/* Disable eQEP controller */
+	writew(eqep->prior_qepctl & ~PHEN, eqep->mmio_base + QEPCTL);
+
+	/* Decrement the device usage count and run pm_runtime_idle() if zero */
+	pm_runtime_put_sync(dev);
+
+	/* Return success */
+	return 0;
+}
+
+/* Power management wake device back up */
+static int eqep_resume(struct device *dev)
+{
+	/* Get the eqep driver information */
+	struct eqep_chip *eqep = dev_get_drvdata(dev);
+
+	/* Restore interrupt enabled register */
+	writew(eqep->prior_qeint, eqep->mmio_base + QEINT);
+
+	/* Restore prior qep control register */
+	writew(eqep->prior_qepctl, eqep->mmio_base + QEPCTL);
+
+	/* Increment the device usage count and run pm_runtime_resume() */
+	pm_runtime_get_sync(dev);
+
+	/* Success */
+	return 0;
+}
+
+/* create pm functions object */
+static SIMPLE_DEV_PM_OPS(eqep_pm_ops, eqep_suspend, eqep_resume);
+
+/* Platform driver information */
+static struct platform_driver eqep_driver = {
+	.driver = {
+		.name	= "eqep",
+		.owner	= THIS_MODULE,
+		.pm	= &eqep_pm_ops,
+		.of_match_table = eqep_of_match,
+	},
+	.probe = eqep_probe,
+	.remove = eqep_remove,
+};
+
+/* Register this platform driver */
+module_platform_driver(eqep_driver);
+
+/* Module information */
+MODULE_DESCRIPTION("TI eQEP driver");
+MODULE_AUTHOR("Nathaniel R. Lewis");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c
index 87a1337..eb57610 100644
--- a/drivers/misc/tsl2550.c
+++ b/drivers/misc/tsl2550.c
@@ -177,7 +177,7 @@ static int tsl2550_calculate_lux(u8 ch0, u8 ch1)
 		} else
 			lux = 0;
 	else
-		return -EAGAIN;
+		return 0;
 
 	/* LUX range check */
 	return lux > TSL2550_MAX_LUX ? TSL2550_MAX_LUX : lux;
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 709a872..288cb38 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -36,6 +36,7 @@
 #include <linux/compat.h>
 #include <linux/pm_runtime.h>
 #include <linux/idr.h>
+#include <linux/of.h>
 
 #include <linux/mmc/ioctl.h>
 #include <linux/mmc/card.h>
@@ -2219,13 +2220,20 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 {
 	struct mmc_blk_data *md;
 	int devidx, ret;
+	int mindynidx = max(0, of_alias_get_highest_id("mmc") + 1);
+	int reqidx = card->host->index;
 
 again:
 	if (!ida_pre_get(&mmc_blk_ida, GFP_KERNEL))
 		return ERR_PTR(-ENOMEM);
 
 	spin_lock(&mmc_blk_lock);
-	ret = ida_get_new(&mmc_blk_ida, &devidx);
+	ret = ida_get_new_above(&mmc_blk_ida, reqidx, &devidx);
+	if (!ret && devidx < mindynidx && devidx != reqidx) {
+		// requested index in use, fall back to dynamic
+		ida_remove(&mmc_blk_ida, devidx);
+		ret = ida_get_new_above(&mmc_blk_ida, mindynidx, &devidx);
+	}
 	spin_unlock(&mmc_blk_lock);
 
 	if (ret == -EAGAIN)
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 848b345..2ae45ee 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -344,6 +344,8 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 {
 	int err;
 	struct mmc_host *host;
+	int mindynidx = max(0, of_alias_get_highest_id("mmc") + 1);
+	int reqidx = dev->of_node ? of_alias_get_id(dev->of_node, "mmc") : -1;
 
 	host = kzalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL);
 	if (!host)
@@ -359,7 +361,16 @@ again:
 	}
 
 	spin_lock(&mmc_host_lock);
-	err = ida_get_new(&mmc_host_ida, &host->index);
+	if (reqidx >= 0) {
+		err = ida_get_new_above(&mmc_host_ida, reqidx, &host->index);
+		if (!err && host->index != reqidx) {
+			// requested index in use, fall back to dynamic
+			ida_remove(&mmc_host_ida, host->index);
+			reqidx = -1;
+		}
+	}
+	if (reqidx < 0)
+		err = ida_get_new_above(&mmc_host_ida, mindynidx, &host->index);
 	spin_unlock(&mmc_host_lock);
 
 	if (err == -EAGAIN) {
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 5f2f24a..c04d836 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2098,9 +2098,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 		host->dbclk = NULL;
 	}
 
-	/* Since we do only SG emulation, we can have as many segs
-	 * as we want. */
-	mmc->max_segs = 1024;
+	/* Set this to a value that allows allocating an entire descriptor
+	 * list within a page (zero order allocation). */
+	mmc->max_segs = 64;
 
 	mmc->max_blk_size = 512;       /* Block Length at max can be 1024 */
 	mmc->max_blk_count = 0xFFFF;    /* No. of Blocks is 16 bits */
diff --git a/drivers/net/ethernet/allwinner/Kconfig b/drivers/net/ethernet/allwinner/Kconfig
index 47da7e7..060569c 100644
--- a/drivers/net/ethernet/allwinner/Kconfig
+++ b/drivers/net/ethernet/allwinner/Kconfig
@@ -33,4 +33,17 @@ config SUN4I_EMAC
           To compile this driver as a module, choose M here.  The module
           will be called sun4i-emac.
 
+config SUN8I_EMAC
+	tristate "Allwinner sun8i EMAC support"
+	depends on ARCH_SUNXI || COMPILE_TEST
+	depends on OF
+	select MII
+	select PHYLIB
+        ---help---
+	  This driver support the sun8i EMAC ethernet driver present on
+	  H3/A83T/A64 Allwinner SoCs.
+
+          To compile this driver as a module, choose M here.  The module
+          will be called sun8i-emac.
+
 endif # NET_VENDOR_ALLWINNER
diff --git a/drivers/net/ethernet/allwinner/Makefile b/drivers/net/ethernet/allwinner/Makefile
index 03129f7..8bd1693 100644
--- a/drivers/net/ethernet/allwinner/Makefile
+++ b/drivers/net/ethernet/allwinner/Makefile
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_SUN4I_EMAC) += sun4i-emac.o
+obj-$(CONFIG_SUN8I_EMAC) += sun8i-emac.o
diff --git b/drivers/net/ethernet/allwinner/sun8i-emac.c b/drivers/net/ethernet/allwinner/sun8i-emac.c
new file mode 100644
index 0000000..fc0c1dd
--- /dev/null
+++ b/drivers/net/ethernet/allwinner/sun8i-emac.c
@@ -0,0 +1,2129 @@
+/*
+ * sun8i-emac driver
+ *
+ * Copyright (C) 2015-2016 Corentin LABBE <clabbe.montjoie@gmail.com>
+ *
+ * This is the driver for Allwinner Ethernet MAC found in H3/A83T/A64 SoC
+ *
+ * TODO:
+ * - MAC filtering
+ * - Jumbo frame
+ * - features rx-all (NETIF_F_RXALL_BIT)
+ */
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/mii.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/phy.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
+
+#define SUN8I_EMAC_BASIC_CTL0	0x00
+#define SUN8I_EMAC_BASIC_CTL1	0x04
+#define SUN8I_EMAC_INT_STA	0x08
+#define SUN8I_EMAC_INT_EN	0x0C
+#define SUN8I_EMAC_TX_CTL0	0x10
+#define SUN8I_EMAC_TX_CTL1	0x14
+#define SUN8I_EMAC_TX_FLOW_CTL	0x1C
+#define SUN8I_EMAC_RX_CTL0	0x24
+#define SUN8I_EMAC_RX_CTL1	0x28
+#define SUN8I_EMAC_RX_FRM_FLT	0x38
+#define SUN8I_EMAC_MDIO_CMD	0x48
+#define SUN8I_EMAC_MDIO_DATA	0x4C
+#define SUN8I_EMAC_TX_DMA_STA	0xB0
+#define SUN8I_EMAC_TX_CUR_DESC	0xB4
+#define SUN8I_EMAC_TX_CUR_BUF	0xB8
+#define SUN8I_EMAC_RX_DMA_STA	0xC0
+
+#define MDIO_CMD_MII_BUSY	BIT(0)
+#define MDIO_CMD_MII_WRITE	BIT(1)
+#define MDIO_CMD_MII_PHY_REG_ADDR_MASK	GENMASK(8, 4)
+#define MDIO_CMD_MII_PHY_REG_ADDR_SHIFT	4
+#define MDIO_CMD_MII_PHY_ADDR_MASK	GENMASK(16, 12)
+#define MDIO_CMD_MII_PHY_ADDR_SHIFT	12
+
+#define SUN8I_EMAC_MACADDR_HI	0x50
+#define SUN8I_EMAC_MACADDR_LO	0x54
+
+#define SUN8I_EMAC_RX_DESC_LIST 0x34
+#define SUN8I_EMAC_TX_DESC_LIST 0x20
+
+#define SUN8I_EMAC_RX_DO_CRC BIT(27)
+#define SUN8I_EMAC_RX_STRIP_FCS BIT(28)
+
+#define SUN8I_COULD_BE_USED_BY_DMA BIT(31)
+
+/* Used in RX_CTL1*/
+#define RX_DMA_EN	BIT(30)
+#define RX_DMA_START	BIT(31)
+/* Used in TX_CTL1*/
+#define TX_DMA_EN	BIT(30)
+#define TX_DMA_START	BIT(31)
+
+/* Used in RX_CTL0 */
+#define RX_RECEIVER_EN		BIT(31)
+/* Used in TX_CTL0 */
+#define TX_TRANSMITTER_EN	BIT(31)
+
+/* Basic CTL0 */
+#define BCTL0_FD BIT(0)
+#define BCTL0_SPEED_10		2
+#define BCTL0_SPEED_100		3
+#define BCTL0_SPEED_MASK	GENMASK(3, 2)
+#define BCTL0_SPEED_SHIFT	2
+
+#define FLOW_RX 1
+#define FLOW_TX 2
+
+#define RX_INT                  BIT(8)
+#define TX_INT                  BIT(0)
+
+/* Bits used in frame RX status */
+#define DSC_RX_FIRST		BIT(9)
+#define DSC_RX_LAST		BIT(8)
+
+/* Bits used in frame TX ctl */
+#define SUN8I_EMAC_MAGIC_TX_BIT	BIT(24)
+#define SUN8I_EMAC_TX_DO_CRC	(BIT(27) | BIT(28))
+#define DSC_TX_FIRST		BIT(29)
+#define DSC_TX_LAST		BIT(30)
+#define SUN8I_EMAC_WANT_INT	BIT(31)
+
+enum emac_variant {
+	NONE_EMAC,/* for be sure that variant is non-0 if set */
+	A83T_EMAC,
+	H3_EMAC,
+	A64_EMAC,
+};
+
+static const char const estats_str[][ETH_GSTRING_LEN] = {
+	/* errors */
+	"rx_payload_error",
+	"rx_CRC_error",
+	"rx_phy_error",
+	"rx_length_error",
+	"rx_col_error",
+	"rx_header_error",
+	"rx_overflow_error",
+	"rx_saf_error",
+	"rx_daf_error",
+	"rx_buf_error",
+	/* misc infos */
+	"tx_stop_queue",
+	"rx_dma_ua",
+	"rx_dma_stop",
+	"tx_dma_ua",
+	"tx_dma_stop",
+	"rx_hw_csum",
+	"tx_hw_csum",
+	/* interrupts */
+	"rx_int",
+	"tx_int",
+	"rx_early_int",
+	"tx_early_int",
+	"tx_underflow_int",
+	/* debug */
+	"tx_used_desc",
+	"napi_schedule",
+	"napi_underflow",
+};
+
+struct sun8i_emac_stats {
+	u64 rx_payload_error;
+	u64 rx_crc_error;
+	u64 rx_phy_error;
+	u64 rx_length_error;
+	u64 rx_col_error;
+	u64 rx_header_error;
+	u64 rx_overflow_error;
+	u64 rx_saf_fail;
+	u64 rx_daf_fail;
+	u64 rx_buf_error;
+
+	u64 tx_stop_queue;
+	u64 rx_dma_ua;
+	u64 rx_dma_stop;
+	u64 tx_dma_ua;
+	u64 tx_dma_stop;
+	u64 rx_hw_csum;
+	u64 tx_hw_csum;
+
+	u64 rx_int;
+	u64 tx_int;
+	u64 rx_early_int;
+	u64 tx_early_int;
+	u64 tx_underflow_int;
+
+	u64 tx_used_desc;
+	u64 napi_schedule;
+	u64 napi_underflow;
+};
+
+/* The datasheet said that each descriptor can transfers up to 4096bytes
+ * But latter, a register documentation reduce that value to 2048
+ * Anyway using 2048 cause strange behaviours and even BSP driver use 2047
+ */
+#define DESC_BUF_MAX 2044
+#if (DESC_BUF_MAX < (ETH_FRAME_LEN + 4))
+#error "DESC_BUF_MAX must be set at minimum to ETH_FRAME_LEN + 4"
+#endif
+
+/* MAGIC value for knowing if a descriptor is available or not */
+#define DCLEAN (BIT(16) | BIT(14) | BIT(12) | BIT(10) | BIT(9))
+
+/* struct dma_desc - Structure of DMA descriptor used by the hardware
+ * @status: Status of the frame written by HW, so RO for the
+ *	driver (except for BIT(31) which is R/W)
+ * @ctl: Information on the frame written by the driver (INT, len,...)
+ * @buf_addr: physical address of the frame data
+ * @next: physical address of next dma_desc
+ */
+struct dma_desc {
+	u32 status;
+	u32 ctl;
+	u32 buf_addr;
+	u32 next;
+};
+
+/* Describe how data from skb are DMA mapped (used in txinfo map member) */
+#define MAP_SINGLE 1
+#define MAP_PAGE 2
+
+/* Structure for storing information about data in TX ring buffer */
+struct txinfo {
+	struct sk_buff *skb;
+	int map;
+};
+
+struct sun8i_emac_priv {
+	void __iomem *base;
+	void __iomem *syscon;
+	int irq;
+	struct device *dev;
+	struct net_device *ndev;
+	struct mii_bus *mdio;
+	struct napi_struct napi;
+	spinlock_t tx_lock;/* control the access of transmit descriptors */
+	int duplex;
+	int speed;
+	int link;
+	int phy_interface;
+	enum emac_variant variant;
+	struct device_node *phy_node;
+	struct clk *ahb_clk;
+	struct clk *ephy_clk;
+	bool use_internal_phy;
+
+	struct reset_control *rst;
+	struct reset_control *rst_ephy;
+
+	struct dma_desc *dd_rx;
+	dma_addr_t dd_rx_phy;
+	struct dma_desc *dd_tx;
+	dma_addr_t dd_tx_phy;
+	struct sk_buff **rx_skb;
+	struct txinfo *txl;
+
+	int nbdesc_tx;
+	int nbdesc_rx;
+	int tx_slot;
+	int tx_dirty;
+	int rx_dirty;
+	struct sun8i_emac_stats estats;
+	u32 msg_enable;
+	int flow_ctrl;
+	int pause;
+};
+
+static irqreturn_t sun8i_emac_dma_interrupt(int irq, void *dev_id);
+
+static void rb_inc(int *p, const int max)
+{
+	(*p)++;
+	(*p) %= max;
+}
+
+/* Return the number of contiguous free descriptors
+ * starting from tx_slot
+ */
+static int rb_tx_numfreedesc(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	if (priv->tx_slot < priv->tx_dirty)
+		return priv->tx_dirty - priv->tx_slot;
+
+	return (priv->nbdesc_tx - priv->tx_slot) + priv->tx_dirty;
+}
+
+/* Allocate a skb in a DMA descriptor
+ *
+ * @i index of slot to fill
+*/
+static int sun8i_emac_rx_skb(struct net_device *ndev, int i)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct dma_desc *ddesc;
+	struct sk_buff *skb;
+
+	ddesc = priv->dd_rx + i;
+
+	ddesc->ctl = 0;
+
+	skb = netdev_alloc_skb_ip_align(ndev, DESC_BUF_MAX);
+	if (!skb)
+		return -ENOMEM;
+
+	/* should not happen */
+	if (unlikely(priv->rx_skb[i]))
+		dev_warn(priv->dev, "BUG: Leaking a skbuff\n");
+
+	priv->rx_skb[i] = skb;
+
+	ddesc->buf_addr = dma_map_single(priv->dev, skb->data,
+					 DESC_BUF_MAX, DMA_FROM_DEVICE);
+	if (dma_mapping_error(priv->dev, ddesc->buf_addr)) {
+		dev_err(priv->dev, "ERROR: Cannot map RX buffer for DMA\n");
+		dev_kfree_skb(skb);
+		return -EFAULT;
+	}
+	ddesc->ctl |= DESC_BUF_MAX;
+	wmb();/* SUN8I_COULD_BE_USED_BY_DMA must be the last value written */
+	ddesc->status = SUN8I_COULD_BE_USED_BY_DMA;
+
+	return 0;
+}
+
+static void sun8i_emac_stop_tx(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	u32 v;
+
+	netif_stop_queue(ndev);
+
+	v = readl(priv->base + SUN8I_EMAC_TX_CTL0);
+	v &= ~TX_TRANSMITTER_EN;/*Disable transmitter after current reception*/
+	writel(v, priv->base + SUN8I_EMAC_TX_CTL0);
+	v = readl(priv->base + SUN8I_EMAC_TX_CTL1);
+	v &= ~TX_DMA_EN; /* Stop TX DMA */
+	writel(v, priv->base + SUN8I_EMAC_TX_CTL1);
+}
+
+static void sun8i_emac_stop_rx(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	u32 v;
+
+	v = readl(priv->base + SUN8I_EMAC_RX_CTL0);
+	v &= ~RX_RECEIVER_EN; /* Disable receiver after current reception */
+	writel(v, priv->base + SUN8I_EMAC_RX_CTL0);
+	v = readl(priv->base + SUN8I_EMAC_RX_CTL1);
+	v &= ~RX_DMA_EN; /* Stop RX DMA */
+	writel(v, priv->base + SUN8I_EMAC_RX_CTL1);
+}
+
+static void sun8i_emac_start_rx(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	u32 v;
+
+	v = readl(priv->base + SUN8I_EMAC_RX_CTL0);
+	v |= RX_RECEIVER_EN;/* Enable receiver */
+	writel(v, priv->base + SUN8I_EMAC_RX_CTL0);
+
+	v = readl(priv->base + SUN8I_EMAC_RX_CTL1);
+	v |= RX_DMA_START;
+	v |= RX_DMA_EN;
+	writel(v, priv->base + SUN8I_EMAC_RX_CTL1);
+}
+
+static void sun8i_emac_start_tx(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	u32 v;
+
+	v = readl(priv->base + SUN8I_EMAC_TX_CTL0);
+	v |= TX_TRANSMITTER_EN;
+	writel(v, priv->base + SUN8I_EMAC_TX_CTL0);
+
+	v = readl(priv->base + SUN8I_EMAC_TX_CTL1);
+	v |= TX_DMA_START;
+	v |= TX_DMA_EN;
+	writel(v, priv->base + SUN8I_EMAC_TX_CTL1);
+}
+
+/* Set MAC address for slot index
+ * @addr: the MAC address to set
+ * @index: The index of slot where to set address.
+ * The slot 0 is the main MACaddr
+ */
+static void sun8i_emac_set_macaddr(struct sun8i_emac_priv *priv,
+				   const u8 *addr, int index)
+{
+	u32 v;
+
+	dev_info(priv->dev, "device MAC address slot %d %02x:%02x:%02x:%02x:%02x:%02x\n",
+		 index, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+	v = (addr[5] << 8) | addr[4];
+	writel(v, priv->base + SUN8I_EMAC_MACADDR_HI + index * 8);
+	v = (addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8) | addr[0];
+	writel(v, priv->base + SUN8I_EMAC_MACADDR_LO + index * 8);
+}
+
+static void sun8i_emac_set_link_mode(struct sun8i_emac_priv *priv)
+{
+	u32 v;
+
+	v = readl(priv->base + SUN8I_EMAC_BASIC_CTL0);
+
+	if (priv->duplex)
+		v |= BCTL0_FD;
+	else
+		v &= ~BCTL0_FD;
+
+	v &= ~BCTL0_SPEED_MASK;
+	switch (priv->speed) {
+	case 1000:
+		break;
+	case 100:
+		v |= BCTL0_SPEED_100 << BCTL0_SPEED_SHIFT;
+		break;
+	case 10:
+		v |= BCTL0_SPEED_10 << BCTL0_SPEED_SHIFT;
+		break;
+	}
+
+	writel(v, priv->base + SUN8I_EMAC_BASIC_CTL0);
+}
+
+static void sun8i_emac_flow_ctrl(struct sun8i_emac_priv *priv, int duplex,
+				 int fc)
+{
+	u32 flow = 0;
+
+	netif_dbg(priv, link, priv->ndev, "%s %d %d\n", __func__,
+		  duplex, fc);
+
+	flow = readl(priv->base + SUN8I_EMAC_RX_CTL0);
+	if (fc & FLOW_RX)
+		flow |= BIT(16);
+	else
+		flow &= ~BIT(16);
+	writel(flow, priv->base + SUN8I_EMAC_RX_CTL0);
+
+	flow = readl(priv->base + SUN8I_EMAC_TX_FLOW_CTL);
+	if (fc & FLOW_TX)
+		flow |= BIT(0);
+	else
+		flow &= ~BIT(0);
+	writel(flow, priv->base + SUN8I_EMAC_TX_FLOW_CTL);
+}
+
+/* Grab a frame into a skb from descriptor number i */
+static int sun8i_emac_rx_from_ddesc(struct net_device *ndev, int i)
+{
+	struct sk_buff *skb;
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct dma_desc *ddesc = priv->dd_rx + i;
+	int frame_len;
+	int rxcsum_done = 0;
+
+	if (ndev->features & NETIF_F_RXCSUM)
+		rxcsum_done = 1;
+
+	/* bit0/bit7 work only on IPv4/IPv6 TCP traffic,
+	 * (not on ARP for example) so we dont raise rx_errors/discard frame
+	 */
+	/* the checksum or length of received frame's payload is wrong*/
+	if (ddesc->status & BIT(0)) {
+		priv->estats.rx_payload_error++;
+		rxcsum_done = 0;
+	}
+	/* RX_CRC_ERR */
+	if (ddesc->status & BIT(1)) {
+		priv->ndev->stats.rx_errors++;
+		priv->ndev->stats.rx_crc_errors++;
+		priv->estats.rx_crc_error++;
+		goto discard_frame;
+	}
+	/* RX_PHY_ERR */
+	if ((ddesc->status & BIT(3))) {
+		priv->ndev->stats.rx_errors++;
+		priv->estats.rx_phy_error++;
+		goto discard_frame;
+	}
+	/* RX_LENGTH_ERR */
+	if ((ddesc->status & BIT(4))) {
+		priv->ndev->stats.rx_errors++;
+		priv->ndev->stats.rx_length_errors++;
+		priv->estats.rx_length_error++;
+		goto discard_frame;
+	}
+	/* RX_COL_ERR */
+	if ((ddesc->status & BIT(6))) {
+		priv->ndev->stats.rx_errors++;
+		priv->estats.rx_col_error++;
+		goto discard_frame;
+	}
+	/* RX_HEADER_ERR */
+	if ((ddesc->status & BIT(7))) {
+		priv->estats.rx_header_error++;
+		rxcsum_done = 0;
+	}
+	/* RX_OVERFLOW_ERR */
+	if ((ddesc->status & BIT(11))) {
+		priv->ndev->stats.rx_over_errors++;
+		priv->estats.rx_overflow_error++;
+		goto discard_frame;
+	}
+	/* RX_NO_ENOUGTH_BUF_ERR */
+	if ((ddesc->status & BIT(14))) {
+		priv->ndev->stats.rx_errors++;
+		priv->estats.rx_buf_error++;
+		goto discard_frame;
+	}
+
+	/* BIT(9) is for the first frame, not having it is bad since we do not
+	 * handle Jumbo frame
+	 */
+	if ((ddesc->status & DSC_RX_FIRST) == 0) {
+		dev_warn_ratelimited(priv->dev, "BUG: Non-first frame received. This should not happen\n");
+		goto discard_frame;
+	}
+	frame_len = (ddesc->status >> 16) & 0x3FFF;
+	if (!(ndev->features & NETIF_F_RXFCS))
+		frame_len -= ETH_FCS_LEN;
+
+	skb = priv->rx_skb[i];
+
+	netif_dbg(priv, rx_status, priv->ndev,
+		  "%s from %02d %pad len=%d status=%x st=%x\n",
+		  __func__, i, &ddesc, frame_len, ddesc->status, ddesc->ctl);
+
+	skb_put(skb, frame_len);
+
+	dma_unmap_single(priv->dev, ddesc->buf_addr, DESC_BUF_MAX,
+			 DMA_FROM_DEVICE);
+	skb->protocol = eth_type_trans(skb, priv->ndev);
+	if (rxcsum_done) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		priv->estats.rx_hw_csum++;
+	} else {
+		skb->ip_summed = CHECKSUM_PARTIAL;
+	}
+
+	priv->ndev->stats.rx_packets++;
+	priv->ndev->stats.rx_bytes += frame_len;
+	priv->rx_skb[i] = NULL;
+
+	/* this frame is not the last */
+	if ((ddesc->status & DSC_RX_LAST) == 0) {
+		dev_warn(priv->dev, "Multi frame not implemented currlen=%d\n",
+			 frame_len);
+	}
+
+	sun8i_emac_rx_skb(ndev, i);
+	napi_gro_receive(&priv->napi, skb);
+
+	return 0;
+	/* If the frame need to be dropped, we simply reuse the buffer */
+discard_frame:
+	ddesc->ctl = DESC_BUF_MAX;
+	wmb();/* SUN8I_COULD_BE_USED_BY_DMA must be the last value written */
+	ddesc->status = SUN8I_COULD_BE_USED_BY_DMA;
+	return 0;
+}
+
+/* iterate over dma_desc for finding completed xmit.
+ * Called from interrupt context, so no need to spinlock tx
+ *
+ * The problem is: how to know that a descriptor is sent and not just in
+ * preparation.
+ * Need to have status=0 and st set but this is the state of first frame just
+ * before setting the own-by-DMA bit.
+ * The solution is to used the artificial value DCLEAN.
+ */
+static int sun8i_emac_complete_xmit(struct net_device *ndev, int budget)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct dma_desc *ddesc;
+	int frame_len;
+	int work = 0;
+
+	spin_lock(&priv->tx_lock);
+	do {
+		ddesc = priv->dd_tx + priv->tx_dirty;
+
+		if (ddesc->status & SUN8I_COULD_BE_USED_BY_DMA)
+			goto xmit_end;
+
+		if (ddesc->status == DCLEAN)
+			goto xmit_end;
+
+		if (ddesc->status == 0 && !ddesc->ctl) {
+			dev_err(priv->dev, "BUG: reached the void %d %d\n",
+				priv->tx_dirty, priv->tx_slot);
+			goto xmit_end;
+		}
+
+		/* TX_UNDERFLOW_ERR */
+		if (ddesc->status & BIT(1))
+			priv->ndev->stats.tx_errors++;
+		/* TX_DEFER_ERR */
+		if (ddesc->status & BIT(2))
+			priv->ndev->stats.tx_errors++;
+		/* BIT 6:3 numbers of collisions */
+		if (ddesc->status & 0x78)
+			priv->ndev->stats.collisions +=
+				(ddesc->status & 0x78) >> 3;
+		/* TX_COL_ERR_1 */
+		if (ddesc->status & BIT(8))
+			priv->ndev->stats.tx_errors++;
+		/* TX_COL_ERR_0 */
+		if (ddesc->status & BIT(9))
+			priv->ndev->stats.tx_errors++;
+		/* TX_CRS_ERR */
+		if (ddesc->status & BIT(10))
+			priv->ndev->stats.tx_carrier_errors++;
+		/* TX_PAYLOAD_ERR */
+		if (ddesc->status & BIT(12))
+			priv->ndev->stats.tx_errors++;
+		/* TX_LENGTH_ERR */
+		if (ddesc->status & BIT(14))
+			priv->ndev->stats.tx_errors++;
+		/* TX_HEADER_ERR */
+		if (ddesc->status & BIT(16))
+			priv->ndev->stats.tx_errors++;
+		frame_len = ddesc->ctl & 0x3FFF;
+		if (priv->txl[priv->tx_dirty].map == MAP_SINGLE)
+			dma_unmap_single(priv->dev, ddesc->buf_addr,
+					 frame_len, DMA_TO_DEVICE);
+		else
+			dma_unmap_page(priv->dev, ddesc->buf_addr,
+				       frame_len, DMA_TO_DEVICE);
+		/* we can free skb only on last frame */
+		if (priv->txl[priv->tx_dirty].skb && (ddesc->ctl & DSC_TX_LAST))
+			dev_kfree_skb_irq(priv->txl[priv->tx_dirty].skb);
+
+		priv->txl[priv->tx_dirty].skb = NULL;
+		priv->txl[priv->tx_dirty].map = 0;
+		ddesc->ctl = 0;
+		wmb(); /* setting to DCLEAN is the last value to be set */
+		ddesc->status = DCLEAN;
+		work++;
+
+		rb_inc(&priv->tx_dirty, priv->nbdesc_tx);
+		ddesc = priv->dd_tx + priv->tx_dirty;
+	} while (ddesc->ctl && !(ddesc->status & SUN8I_COULD_BE_USED_BY_DMA));
+
+	if (netif_queue_stopped(ndev) &&
+	    rb_tx_numfreedesc(ndev) > MAX_SKB_FRAGS + 1)
+		netif_wake_queue(ndev);
+xmit_end:
+	spin_unlock(&priv->tx_lock);
+	return work;
+}
+
+static int sun8i_emac_poll(struct napi_struct *napi, int budget)
+{
+	struct sun8i_emac_priv *priv =
+		container_of(napi, struct sun8i_emac_priv, napi);
+	struct net_device *ndev = priv->ndev;
+	int worked;
+	struct dma_desc *ddesc;
+
+	priv->estats.napi_schedule++;
+	worked = sun8i_emac_complete_xmit(ndev, budget);
+
+	ddesc = priv->dd_rx + priv->rx_dirty;
+	while (!(ddesc->status & SUN8I_COULD_BE_USED_BY_DMA) &&
+	       worked < budget) {
+		sun8i_emac_rx_from_ddesc(ndev, priv->rx_dirty);
+		worked++;
+		rb_inc(&priv->rx_dirty, priv->nbdesc_rx);
+		ddesc = priv->dd_rx + priv->rx_dirty;
+	};
+	if (worked < budget) {
+		priv->estats.napi_underflow++;
+		napi_complete(&priv->napi);
+		writel(RX_INT | TX_INT, priv->base + SUN8I_EMAC_INT_EN);
+	}
+	return worked;
+}
+
+static int sun8i_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg)
+{
+	struct net_device *ndev = bus->priv;
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	int err;
+	u32 reg;
+
+	err = readl_poll_timeout(priv->base + SUN8I_EMAC_MDIO_CMD, reg,
+				 !(reg & MDIO_CMD_MII_BUSY), 100, 10000);
+	if (err) {
+		dev_err(priv->dev, "%s timeout %x\n", __func__, reg);
+		return err;
+	}
+
+	reg &= ~MDIO_CMD_MII_WRITE;
+	reg &= ~MDIO_CMD_MII_PHY_REG_ADDR_MASK;
+	reg |= (phy_reg << MDIO_CMD_MII_PHY_REG_ADDR_SHIFT) &
+		MDIO_CMD_MII_PHY_REG_ADDR_MASK;
+
+	reg &= ~MDIO_CMD_MII_PHY_ADDR_MASK;
+
+	reg |= (phy_addr << MDIO_CMD_MII_PHY_ADDR_SHIFT) &
+		MDIO_CMD_MII_PHY_ADDR_MASK;
+
+	reg |= MDIO_CMD_MII_BUSY;
+
+	writel(reg, priv->base + SUN8I_EMAC_MDIO_CMD);
+
+	err = readl_poll_timeout(priv->base + SUN8I_EMAC_MDIO_CMD, reg,
+				 !(reg & MDIO_CMD_MII_BUSY), 100, 10000);
+
+	if (err) {
+		dev_err(priv->dev, "%s timeout %x\n", __func__, reg);
+		return err;
+	}
+
+	return readl(priv->base + SUN8I_EMAC_MDIO_DATA);
+}
+
+static int sun8i_mdio_write(struct mii_bus *bus, int phy_addr, int phy_reg,
+			    u16 data)
+{
+	struct net_device *ndev = bus->priv;
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	u32 reg;
+	int err;
+
+	err = readl_poll_timeout(priv->base + SUN8I_EMAC_MDIO_CMD, reg,
+				 !(reg & MDIO_CMD_MII_BUSY), 100, 10000);
+	if (err) {
+		dev_err(priv->dev, "%s timeout %x\n", __func__, reg);
+		return err;
+	}
+
+	reg &= ~MDIO_CMD_MII_PHY_REG_ADDR_MASK;
+	reg |= (phy_reg << MDIO_CMD_MII_PHY_REG_ADDR_SHIFT) &
+		MDIO_CMD_MII_PHY_REG_ADDR_MASK;
+
+	reg &= ~MDIO_CMD_MII_PHY_ADDR_MASK;
+	reg |= (phy_addr << MDIO_CMD_MII_PHY_ADDR_SHIFT) &
+		MDIO_CMD_MII_PHY_ADDR_MASK;
+
+	reg |= MDIO_CMD_MII_WRITE;
+	reg |= MDIO_CMD_MII_BUSY;
+
+	writel(reg, priv->base + SUN8I_EMAC_MDIO_CMD);
+	writel(data, priv->base + SUN8I_EMAC_MDIO_DATA);
+	dev_dbg(priv->dev, "%s %d %d %x %x\n", __func__, phy_addr, phy_reg,
+		reg, data);
+
+	err = readl_poll_timeout(priv->base + SUN8I_EMAC_MDIO_CMD, reg,
+				 !(reg & MDIO_CMD_MII_BUSY), 100, 10000);
+	if (err) {
+		dev_err(priv->dev, "%s timeout %x\n", __func__, reg);
+		return err;
+	}
+
+	return 0;
+}
+
+static int sun8i_emac_mdio_register(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct mii_bus *bus;
+	int ret;
+
+	bus = mdiobus_alloc();
+	if (!bus) {
+		netdev_err(ndev, "Failed to allocate a new mdio bus\n");
+		return -ENOMEM;
+	}
+
+	bus->name = dev_name(priv->dev);
+	bus->read = &sun8i_mdio_read;
+	bus->write = &sun8i_mdio_write;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%x", bus->name, priv->dev->id);
+
+	bus->parent = priv->dev;
+	bus->priv = ndev;
+
+	ret = of_mdiobus_register(bus, priv->dev->of_node);
+	if (ret) {
+		netdev_err(ndev, "Could not register a MDIO bus: %d\n", ret);
+		mdiobus_free(bus);
+		return ret;
+	}
+
+	priv->mdio = bus;
+
+	return 0;
+}
+
+static void sun8i_emac_mdio_unregister(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	mdiobus_unregister(priv->mdio);
+	mdiobus_free(priv->mdio);
+}
+
+/* Run within phydev->lock */
+static void sun8i_emac_adjust_link(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct phy_device *phydev = ndev->phydev;
+	int new_state = 0;
+
+	netif_dbg(priv, link, priv->ndev,
+		  "%s link=%x duplex=%x speed=%x\n", __func__,
+		  phydev->link, phydev->duplex, phydev->speed);
+	if (!phydev)
+		return;
+
+	if (phydev->link) {
+		if (phydev->duplex != priv->duplex) {
+			new_state = 1;
+			priv->duplex = phydev->duplex;
+		}
+		if (phydev->pause)
+			sun8i_emac_flow_ctrl(priv, phydev->duplex,
+					     priv->flow_ctrl);
+
+		if (phydev->speed != priv->speed) {
+			new_state = 1;
+			priv->speed = phydev->speed;
+		}
+
+		if (priv->link == 0) {
+			new_state = 1;
+			priv->link = phydev->link;
+		}
+
+		netif_dbg(priv, link, priv->ndev,
+			  "%s new=%d link=%d pause=%d\n",
+			  __func__, new_state, priv->link, phydev->pause);
+		if (new_state)
+			sun8i_emac_set_link_mode(priv);
+	} else if (priv->link != phydev->link) {
+		new_state = 1;
+		priv->link = 0;
+		priv->speed = 0;
+		priv->duplex = -1;
+	}
+
+	if (new_state)
+		phy_print_status(phydev);
+}
+
+/* H3 specific bits for EPHY */
+#define H3_EPHY_ADDR_SHIFT	20
+#define H3_EPHY_LED_POL		BIT(17) /* 1: active low, 0: active high */
+#define H3_EPHY_SHUTDOWN	BIT(16) /* 1: shutdown, 0: power up */
+#define H3_EPHY_SELECT		BIT(15) /* 1: internal PHY, 0: external PHY */
+#define H3_EPHY_DEFAULT_VALUE	0x58000
+#define H3_EPHY_DEFAULT_MASK	GENMASK(31, 15)
+
+/* H3/A64 specific bits */
+#define SC_RMII_EN		BIT(13) /* 1: enable RMII (overrides EPIT) */
+
+/* Generic system control EMAC_CLK bits */
+#define SC_ETXDC_MASK		GENMASK(2, 0)
+#define SC_ETXDC_SHIFT		10
+#define SC_ERXDC_MASK		GENMASK(4, 0)
+#define SC_ERXDC_SHIFT		5
+#define SC_EPIT			BIT(2) /* 1: RGMII, 0: MII */
+#define SC_ETCS_MASK		GENMASK(1, 0)
+#define SC_ETCS_MII		0x0
+#define SC_ETCS_EXT_GMII	0x1
+#define SC_ETCS_INT_GMII	0x2
+
+static int sun8i_emac_set_syscon_ephy(struct net_device *ndev, u32 *reg)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct device_node *node = priv->dev->of_node;
+	int ret;
+
+	*reg &= ~H3_EPHY_DEFAULT_MASK;
+	*reg |= H3_EPHY_DEFAULT_VALUE;
+
+	if (!priv->use_internal_phy) {
+		/* switch to external PHY interface */
+		*reg &= ~H3_EPHY_SELECT;
+		return 0;
+	}
+
+	if (priv->phy_interface != PHY_INTERFACE_MODE_MII) {
+		netdev_warn(ndev,
+			    "Internal PHY requested, forcing MII mode.\n");
+		priv->phy_interface = PHY_INTERFACE_MODE_MII;
+	}
+
+	*reg |= H3_EPHY_SELECT;
+	*reg &= ~H3_EPHY_SHUTDOWN;
+
+	if (of_property_read_bool(node, "allwinner,leds-active-low"))
+		*reg |= H3_EPHY_LED_POL;
+
+	ret = of_mdio_parse_addr(priv->dev, priv->phy_node);
+	if (ret < 0) {
+		netdev_err(ndev, "Could not parse MDIO addr\n");
+		return ret;
+	}
+
+	/* of_mdio_parse_addr returns a valid (0 ~ 31) PHY
+	 * address. No need to mask it again.
+	 */
+	*reg |= ret << H3_EPHY_ADDR_SHIFT;
+
+	return 0;
+}
+
+static int sun8i_emac_set_syscon(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct device_node *node = priv->dev->of_node;
+	int ret;
+	u32 reg, val;
+
+	reg = readl(priv->syscon);
+
+	if (priv->variant == H3_EMAC) {
+		ret = sun8i_emac_set_syscon_ephy(ndev, &reg);
+		if (ret)
+			return ret;
+	}
+
+	if (!of_property_read_u32(node, "allwinner,tx-delay", &val)) {
+		if (val <= SC_ETXDC_MASK) {
+			reg &= ~(SC_ETXDC_MASK << SC_ETXDC_SHIFT);
+			reg |= (val << SC_ETXDC_SHIFT);
+		} else {
+			netdev_warn(ndev, "Invalid TX clock delay: %d\n", val);
+		}
+	}
+
+	if (!of_property_read_u32(node, "allwinner,rx-delay", &val)) {
+		if (val <= SC_ERXDC_MASK) {
+			reg &= ~(SC_ERXDC_MASK << SC_ERXDC_SHIFT);
+			reg |= (val << SC_ERXDC_SHIFT);
+		} else {
+			netdev_warn(ndev, "Invalid RX clock delay: %d\n", val);
+		}
+	}
+
+	/* Clear interface mode bits */
+	reg &= ~(SC_ETCS_MASK | SC_EPIT);
+	if (priv->variant == H3_EMAC || priv->variant == A64_EMAC)
+		reg &= ~SC_RMII_EN;
+
+	switch (priv->phy_interface) {
+	case PHY_INTERFACE_MODE_MII:
+		/* default */
+		break;
+	case PHY_INTERFACE_MODE_RGMII:
+		reg |= SC_EPIT | SC_ETCS_INT_GMII;
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		if (priv->variant == H3_EMAC || priv->variant == A64_EMAC) {
+			reg |= SC_RMII_EN | SC_ETCS_EXT_GMII;
+			break;
+		}
+		/* RMII not supported on A83T */
+	default:
+		netdev_err(ndev, "Unsupported interface mode: %s",
+			   phy_modes(priv->phy_interface));
+		return -EINVAL;
+	}
+
+	writel(reg, priv->syscon);
+
+	return 0;
+}
+
+static void sun8i_emac_unset_syscon(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	u32 reg = 0;
+
+	if (priv->variant == H3_EMAC)
+		reg = H3_EPHY_DEFAULT_VALUE;
+
+	writel(reg, priv->syscon);
+}
+
+/* Set Management Data Clock, must be call after device reset */
+static void sun8i_emac_set_mdc(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	unsigned long rate;
+	u32 reg;
+
+	rate = clk_get_rate(priv->ahb_clk);
+	if (rate > 160000000)
+		reg = 0x3 << 20; /* AHB / 128 */
+	else if (rate > 80000000)
+		reg = 0x2 << 20; /* AHB / 64 */
+	else if (rate > 40000000)
+		reg = 0x1 << 20; /* AHB / 32 */
+	else
+		reg = 0x0 << 20; /* AHB / 16 */
+	netif_dbg(priv, link, ndev, "MDC auto : %x\n", reg);
+	writel(reg, priv->base + SUN8I_EMAC_MDIO_CMD);
+}
+
+/* "power" the device, by enabling clk/reset/regulators */
+static int sun8i_emac_power(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	int ret;
+
+	ret = clk_prepare_enable(priv->ahb_clk);
+	if (ret) {
+		netdev_err(ndev, "Could not enable AHB clock\n");
+		return ret;
+	}
+
+	if (priv->rst) {
+		ret = reset_control_deassert(priv->rst);
+		if (ret) {
+			netdev_err(ndev, "Could not deassert reset\n");
+			goto err_reset;
+		}
+	}
+
+	if (priv->ephy_clk) {
+		ret = clk_prepare_enable(priv->ephy_clk);
+		if (ret) {
+			netdev_err(ndev, "Could not enable EPHY clock\n");
+			goto err_ephy_clk;
+		}
+	}
+
+	if (priv->rst_ephy) {
+		ret = reset_control_deassert(priv->rst_ephy);
+		if (ret) {
+			netdev_err(ndev, "Could not deassert EPHY reset\n");
+			goto err_ephy_reset;
+		}
+	}
+
+	return 0;
+
+err_ephy_reset:
+	if (priv->ephy_clk)
+		clk_disable_unprepare(priv->ephy_clk);
+err_ephy_clk:
+	if (priv->rst)
+		reset_control_assert(priv->rst);
+err_reset:
+	clk_disable_unprepare(priv->ahb_clk);
+	return ret;
+}
+
+/* "Unpower" the device, disabling clocks and regulators, asserting reset */
+static void sun8i_emac_unpower(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	if (priv->rst_ephy)
+		reset_control_assert(priv->rst_ephy);
+
+	if (priv->ephy_clk)
+		clk_disable_unprepare(priv->ephy_clk);
+
+	if (priv->rst)
+		reset_control_assert(priv->rst);
+
+	clk_disable_unprepare(priv->ahb_clk);
+}
+
+static int sun8i_emac_init(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct device_node *node = priv->dev->of_node;
+	const u8 *addr;
+
+	/* Try to get MAC address from DT, or assign a random one */
+	addr = of_get_mac_address(node);
+	if (addr)
+		ether_addr_copy(ndev->dev_addr, addr);
+	else
+		eth_hw_addr_random(ndev);
+
+	priv->phy_interface = of_get_phy_mode(node);
+	if (priv->phy_interface < 0) {
+		netdev_err(ndev, "PHY interface mode node unspecified\n");
+		return priv->phy_interface;
+	}
+
+	return sun8i_emac_power(ndev);
+}
+
+static void sun8i_emac_uninit(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	mdiobus_unregister(priv->mdio);
+
+	sun8i_emac_unpower(ndev);
+}
+
+static int sun8i_emac_mdio_probe(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct phy_device *phydev = NULL;
+
+	phydev = of_phy_connect(ndev, priv->phy_node, &sun8i_emac_adjust_link,
+				0, priv->phy_interface);
+
+	if (!phydev) {
+		netdev_err(ndev, "Could not attach to PHY\n");
+		return -ENODEV;
+	}
+
+	phy_attached_info(phydev);
+
+	/* mask with MAC supported features */
+	phydev->supported &= PHY_GBIT_FEATURES;
+	phydev->advertising = phydev->supported;
+
+	priv->link = 0;
+	priv->speed = 0;
+	priv->duplex = -1;
+
+	return 0;
+}
+
+/* Allocate both RX and TX ring buffer and init them
+ * This function also write the startbase of thoses ring in the device.
+ * All structures that help managing thoses rings are also handled
+ * by this functions (rx_skb/txl)
+ */
+static int sun8i_emac_alloc_rings(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct dma_desc *ddesc;
+	int err, i;
+
+	priv->rx_skb = kcalloc(priv->nbdesc_rx, sizeof(struct sk_buff *),
+			      GFP_KERNEL);
+	if (!priv->rx_skb) {
+		err = -ENOMEM;
+		goto rx_skb_error;
+	}
+	priv->txl = kcalloc(priv->nbdesc_tx, sizeof(struct txinfo), GFP_KERNEL);
+	if (!priv->txl) {
+		err = -ENOMEM;
+		goto tx_error;
+	}
+
+	/* allocate/init RX ring */
+	priv->dd_rx = dma_zalloc_coherent(priv->dev,
+			priv->nbdesc_rx * sizeof(struct dma_desc),
+			&priv->dd_rx_phy, GFP_KERNEL);
+	if (!priv->dd_rx) {
+		dev_err(priv->dev, "ERROR: cannot allocate DMA RX buffer");
+		err = -ENOMEM;
+		goto dma_rx_error;
+	}
+	ddesc = priv->dd_rx;
+	for (i = 0; i < priv->nbdesc_rx; i++) {
+		sun8i_emac_rx_skb(ndev, i);
+		ddesc->next = (u32)priv->dd_rx_phy + (i + 1)
+			* sizeof(struct dma_desc);
+		ddesc++;
+	}
+	/* last descriptor point back to first one */
+	ddesc--;
+	ddesc->next = (u32)priv->dd_rx_phy;
+
+	/* allocate/init TX ring */
+	priv->dd_tx = dma_zalloc_coherent(priv->dev,
+			priv->nbdesc_tx * sizeof(struct dma_desc),
+			&priv->dd_tx_phy, GFP_KERNEL);
+	if (!priv->dd_tx) {
+		dev_err(priv->dev, "ERROR: cannot allocate DMA TX buffer");
+		err = -ENOMEM;
+		goto dma_tx_error;
+	}
+	ddesc = priv->dd_tx;
+	for (i = 0; i < priv->nbdesc_tx; i++) {
+		ddesc->status = DCLEAN;
+		ddesc->ctl = 0;
+		ddesc->next = (u32)(priv->dd_tx_phy + (i + 1)
+			* sizeof(struct dma_desc));
+		ddesc++;
+	}
+	/* last descriptor point back to first one */
+	ddesc--;
+	ddesc->next = (u32)priv->dd_tx_phy;
+	i--;
+
+	priv->tx_slot = 0;
+	priv->tx_dirty = 0;
+	priv->rx_dirty = 0;
+
+	/* write start of RX ring descriptor */
+	writel(priv->dd_rx_phy, priv->base + SUN8I_EMAC_RX_DESC_LIST);
+	/* write start of TX ring descriptor */
+	writel(priv->dd_tx_phy, priv->base + SUN8I_EMAC_TX_DESC_LIST);
+
+	return 0;
+dma_tx_error:
+	dma_free_coherent(priv->dev, priv->nbdesc_rx * sizeof(struct dma_desc),
+			  priv->dd_rx, priv->dd_rx_phy);
+dma_rx_error:
+	kfree(priv->txl);
+tx_error:
+	kfree(priv->rx_skb);
+rx_skb_error:
+	return err;
+}
+
+static int sun8i_emac_open(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	int err;
+	u32 v;
+
+	err = request_irq(priv->irq, sun8i_emac_dma_interrupt, 0,
+			  dev_name(priv->dev), ndev);
+	if (err) {
+		dev_err(priv->dev, "Cannot request IRQ: %d\n", err);
+		return err;
+	}
+
+	/* Set interface mode (and configure internal PHY on H3) */
+	err = sun8i_emac_set_syscon(ndev);
+	if (err)
+		goto err_irq;
+
+	/* Do SOFT RST */
+	v = readl(priv->base + SUN8I_EMAC_BASIC_CTL1);
+	writel(v | 0x01, priv->base + SUN8I_EMAC_BASIC_CTL1);
+
+	err = readl_poll_timeout(priv->base + SUN8I_EMAC_BASIC_CTL1, v,
+				 !(v & 0x01), 100, 10000);
+	if (err) {
+		dev_err(priv->dev, "EMAC reset timeout\n");
+		err = -EFAULT;
+		goto err_syscon;
+	}
+
+	sun8i_emac_set_mdc(ndev);
+
+	err = sun8i_emac_mdio_register(ndev);
+	if (err)
+		goto err_syscon;
+
+	err = sun8i_emac_mdio_probe(ndev);
+	if (err)
+		goto err_syscon;
+
+	/* DMA */
+	v = (8 << 24);/* burst len */
+	writel(v, priv->base + SUN8I_EMAC_BASIC_CTL1);
+
+	writel(RX_INT | TX_INT, priv->base + SUN8I_EMAC_INT_EN);
+
+	v = readl(priv->base + SUN8I_EMAC_RX_CTL0);
+	/* CHECK_CRC */
+	if (ndev->features & NETIF_F_RXCSUM)
+		v |= SUN8I_EMAC_RX_DO_CRC;
+	else
+		v &= ~SUN8I_EMAC_RX_DO_CRC;
+	/* STRIP_FCS */
+	if (ndev->features & NETIF_F_RXFCS)
+		v &= ~SUN8I_EMAC_RX_STRIP_FCS;
+	else
+		v |= SUN8I_EMAC_RX_STRIP_FCS;
+	writel(v, priv->base + SUN8I_EMAC_RX_CTL0);
+
+	v = readl(priv->base + SUN8I_EMAC_TX_CTL1);
+	/* TX_MD Transmission starts after a full frame located in TX DMA FIFO*/
+	v |= BIT(1);
+	/* Undocumented bit (called TX_NEXT_FRM in BSP), the original comment is
+	 * "Operating on second frame increase the performance
+	 * especially when transmit store-and-forward is used."
+	 */
+	v |= BIT(2);
+	writel(v, priv->base + SUN8I_EMAC_TX_CTL1);
+
+	v = readl(priv->base + SUN8I_EMAC_RX_CTL1);
+	/* RX_MD RX DMA reads data from RX DMA FIFO to host memory after a
+	 * complete frame has been written to RX DMA FIFO
+	*/
+	v |= BIT(1);
+	writel(v, priv->base + SUN8I_EMAC_RX_CTL1);
+
+	sun8i_emac_set_macaddr(priv, ndev->dev_addr, 0);
+
+	err = sun8i_emac_alloc_rings(ndev);
+	if (err) {
+		netdev_err(ndev, "Fail to allocate rings\n");
+		goto err_mdio;
+	}
+
+	phy_start(ndev->phydev);
+
+	sun8i_emac_start_rx(ndev);
+	sun8i_emac_start_tx(ndev);
+
+	netif_napi_add(ndev, &priv->napi, sun8i_emac_poll, 64);
+	napi_enable(&priv->napi);
+	netif_start_queue(ndev);
+
+	return 0;
+err_mdio:
+	phy_disconnect(ndev->phydev);
+err_syscon:
+	sun8i_emac_unset_syscon(ndev);
+err_irq:
+	free_irq(priv->irq, ndev);
+	return err;
+}
+
+/* Clean the TX ring of any accepted skb for xmit */
+static void sun8i_emac_tx_clean(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	int i;
+	struct dma_desc *ddesc;
+	int frame_len;
+
+	spin_lock(&priv->tx_lock);
+
+	for (i = 0; i < priv->nbdesc_tx; i++) {
+		if (priv->txl[i].skb) {
+			ddesc = priv->dd_tx + i;
+			frame_len = ddesc->ctl & 0x3FFF;
+			switch (priv->txl[i].map) {
+			case MAP_SINGLE:
+				dma_unmap_single(priv->dev, ddesc->buf_addr,
+						 frame_len, DMA_TO_DEVICE);
+				break;
+			case MAP_PAGE:
+				dma_unmap_page(priv->dev, ddesc->buf_addr,
+					       frame_len, DMA_TO_DEVICE);
+				break;
+			default:
+				dev_err(priv->dev, "Trying to free an empty slot\n");
+				continue;
+			}
+			dev_kfree_skb_any(priv->txl[i].skb);
+			priv->txl[i].skb = NULL;
+			ddesc->ctl = 0;
+			ddesc->status = DCLEAN;
+		}
+	}
+	priv->tx_slot = 0;
+	priv->tx_dirty = 0;
+
+	spin_unlock(&priv->tx_lock);
+}
+
+/* Clean the RX ring */
+static void sun8i_emac_rx_clean(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	int i;
+	struct dma_desc *ddesc;
+
+	/* clean RX ring */
+	for (i = 0; i < priv->nbdesc_rx; i++)
+		if (priv->rx_skb[i]) {
+			ddesc = priv->dd_rx + i;
+			dma_unmap_single(priv->dev, ddesc->buf_addr,
+					 DESC_BUF_MAX, DMA_FROM_DEVICE);
+			dev_kfree_skb_any(priv->rx_skb[i]);
+			priv->rx_skb[i] = NULL;
+		}
+}
+
+static int sun8i_emac_stop(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	napi_disable(&priv->napi);
+
+	sun8i_emac_stop_tx(ndev);
+	sun8i_emac_stop_rx(ndev);
+
+	phy_stop(ndev->phydev);
+	phy_disconnect(ndev->phydev);
+
+	sun8i_emac_mdio_unregister(ndev);
+
+	sun8i_emac_unset_syscon(ndev);
+
+	free_irq(priv->irq, ndev);
+
+	sun8i_emac_rx_clean(ndev);
+	sun8i_emac_tx_clean(ndev);
+
+	kfree(priv->rx_skb);
+	kfree(priv->txl);
+
+	dma_free_coherent(priv->dev, priv->nbdesc_rx * sizeof(struct dma_desc),
+			  priv->dd_rx, priv->dd_rx_phy);
+	dma_free_coherent(priv->dev, priv->nbdesc_tx * sizeof(struct dma_desc),
+			  priv->dd_tx, priv->dd_tx_phy);
+
+	return 0;
+}
+
+static netdev_tx_t sun8i_emac_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct dma_desc *ddesc;
+	struct dma_desc *first;
+	int i = 0, rbd_first;
+	unsigned int len, fraglen, tlen;
+	u32 v;
+	int n;
+	int nf;
+	const skb_frag_t *frag;
+	int do_csum = 0;
+
+	if (skb_put_padto(skb, ETH_ZLEN))
+		return NETDEV_TX_OK;
+	len = skb_headlen(skb);
+
+	n = skb_shinfo(skb)->nr_frags;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		do_csum = 1;
+		priv->estats.tx_hw_csum++;
+	}
+	netif_dbg(priv, tx_queued, ndev, "%s len=%u skblen=%u %x\n", __func__,
+		  len, skb->len,
+		  (skb->ip_summed == CHECKSUM_PARTIAL));
+
+	spin_lock(&priv->tx_lock);
+
+	/* check for contigous space
+	 * We need at least 1(skb->data) + n(numfrags) + 1(one clean slot)
+	 */
+	if (rb_tx_numfreedesc(ndev) < n + 2) {
+		dev_err_ratelimited(priv->dev, "BUG!: TX is full %d %d\n",
+				    priv->tx_dirty, priv->tx_slot);
+		netif_stop_queue(ndev);
+		spin_unlock(&priv->tx_lock);
+		return NETDEV_TX_BUSY;
+	}
+	i = priv->tx_slot;
+
+	ddesc = priv->dd_tx + i;
+	first = priv->dd_tx + i;
+	rbd_first = i;
+
+	priv->tx_slot = (i + 1 + n) % priv->nbdesc_tx;
+
+	ddesc->buf_addr = dma_map_single(priv->dev, skb->data, len,
+					 DMA_TO_DEVICE);
+	if (dma_mapping_error(priv->dev, ddesc->buf_addr)) {
+		dev_err(priv->dev, "ERROR: Cannot map buffer for DMA\n");
+		goto xmit_error;
+	}
+	priv->txl[i].map = MAP_SINGLE;
+	priv->txl[i].skb = skb;
+
+	tlen = len;
+	ddesc->ctl = len;
+	/* Undocumented bit that make it works
+	 * Without it, packets never be sent on H3 SoC
+	 */
+	ddesc->ctl |= SUN8I_EMAC_MAGIC_TX_BIT;
+	if (do_csum)
+		ddesc->ctl |= SUN8I_EMAC_TX_DO_CRC;
+
+	/* handle fragmented skb, one descriptor per fragment  */
+	for (nf = 0; nf < n; nf++) {
+		frag = &skb_shinfo(skb)->frags[nf];
+		rb_inc(&i, priv->nbdesc_tx);
+		priv->txl[i].skb = skb;
+		ddesc = priv->dd_tx + i;
+		fraglen = skb_frag_size(frag);
+		ddesc->ctl = fraglen;
+		tlen += fraglen,
+		ddesc->ctl |= SUN8I_EMAC_MAGIC_TX_BIT;
+		if (do_csum)
+			ddesc->ctl |= SUN8I_EMAC_TX_DO_CRC;
+
+		ddesc->buf_addr = skb_frag_dma_map(priv->dev, frag, 0,
+				fraglen, DMA_TO_DEVICE);
+		if (dma_mapping_error(priv->dev, ddesc->buf_addr)) {
+			dev_err(priv->dev, "Cannot map buffer for DMA\n");
+			goto xmit_error;
+		}
+		priv->txl[i].map = MAP_PAGE;
+		ddesc->status = SUN8I_COULD_BE_USED_BY_DMA;
+	}
+
+	/* frame end */
+	ddesc->ctl |= DSC_TX_LAST;
+	/* We want an interrupt after transmission */
+	ddesc->ctl |= SUN8I_EMAC_WANT_INT;
+
+	rb_inc(&i, priv->nbdesc_tx);
+
+	/* frame begin */
+	first->ctl |= DSC_TX_FIRST;
+	wmb();/* SUN8I_COULD_BE_USED_BY_DMA must be the last value written */
+	first->status = SUN8I_COULD_BE_USED_BY_DMA;
+	priv->tx_slot = i;
+
+	/* Trying to optimize this (recording DMA start/stop) seems
+	 * to lead to errors. So we always start DMA.
+	 */
+	v = readl(priv->base + SUN8I_EMAC_TX_CTL1);
+	v |= TX_DMA_START;
+	v |= TX_DMA_EN;
+	writel(v, priv->base + SUN8I_EMAC_TX_CTL1);
+
+	if (rb_tx_numfreedesc(ndev) < MAX_SKB_FRAGS + 1) {
+		netif_stop_queue(ndev);
+		priv->estats.tx_stop_queue++;
+	}
+	priv->estats.tx_used_desc = rb_tx_numfreedesc(ndev);
+	priv->ndev->stats.tx_packets++;
+	priv->ndev->stats.tx_bytes += tlen;
+
+	spin_unlock(&priv->tx_lock);
+
+	return NETDEV_TX_OK;
+
+xmit_error:
+	/* destroy skb and return TX OK Documentation/DMA-API-HOWTO.txt */
+	/* clean descritors from rbd_first to i */
+	ddesc->ctl = 0;
+	wmb(); /* setting to DCLEAN is the last value to be set */
+	ddesc->status = DCLEAN;
+	do {
+		ddesc = priv->dd_tx + rbd_first;
+		ddesc->ctl = 0;
+		wmb(); /* setting to DCLEAN is the last value to be set */
+		ddesc->status = DCLEAN;
+		rb_inc(&rbd_first, priv->nbdesc_tx);
+	} while (rbd_first != i);
+	spin_unlock(&priv->tx_lock);
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+static int sun8i_emac_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	int max_mtu;
+
+	dev_info(priv->dev, "%s set MTU to %d\n", __func__, new_mtu);
+
+	if (netif_running(ndev)) {
+		dev_err(priv->dev, "%s: must be stopped to change its MTU\n",
+			ndev->name);
+		return -EBUSY;
+	}
+
+	max_mtu = SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN);
+
+	if ((new_mtu < 68) || (new_mtu > max_mtu)) {
+		dev_err(priv->dev, "%s: invalid MTU, max MTU is: %d\n",
+			ndev->name, max_mtu);
+		return -EINVAL;
+	}
+
+	ndev->mtu = new_mtu;
+	netdev_update_features(ndev);
+	return 0;
+}
+
+static netdev_features_t sun8i_emac_fix_features(struct net_device *ndev,
+						 netdev_features_t features)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	netif_dbg(priv, drv, ndev, "%s %llx\n", __func__, features);
+	return features;
+}
+
+static int sun8i_emac_set_features(struct net_device *ndev,
+				   netdev_features_t features)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	u32 v;
+
+	v = readl(priv->base + SUN8I_EMAC_BASIC_CTL0);
+	if (features & NETIF_F_LOOPBACK && netif_running(ndev)) {
+		netif_info(priv, hw, ndev, "Set loopback features");
+		v |= BIT(1);
+	} else {
+		netif_info(priv, hw, ndev, "Unset loopback features");
+		v &= ~BIT(1);
+	}
+	writel(v, priv->base + SUN8I_EMAC_BASIC_CTL0);
+
+	v = readl(priv->base + SUN8I_EMAC_RX_CTL0);
+	if (features & NETIF_F_RXCSUM) {
+		v |= SUN8I_EMAC_RX_DO_CRC;
+		netif_info(priv, hw, ndev, "Doing RX CRC check by hardware");
+	} else {
+		v &= ~SUN8I_EMAC_RX_DO_CRC;
+		netif_info(priv, hw, ndev, "No RX CRC check by hardware");
+	}
+	if (features & NETIF_F_RXFCS) {
+		v &= ~SUN8I_EMAC_RX_STRIP_FCS;
+		netif_info(priv, hw, ndev, "Keep FCS");
+	} else {
+		v |= SUN8I_EMAC_RX_STRIP_FCS;
+		netif_info(priv, hw, ndev, "Strip FCS");
+	}
+	writel(v, priv->base + SUN8I_EMAC_RX_CTL0);
+
+	netif_dbg(priv, drv, ndev, "%s %llx %x\n", __func__, features, v);
+
+	return 0;
+}
+
+static void sun8i_emac_set_rx_mode(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	u32 v = 0;
+	int i = 0;
+	struct netdev_hw_addr *ha;
+
+	/* Receive all multicast frames */
+	v |= BIT(16);
+	/* Receive all control frames */
+	v |= BIT(13);
+	if (ndev->flags & IFF_PROMISC)
+		v |= BIT(1);
+	if (netdev_uc_count(ndev) > 7) {
+		v |= BIT(1);
+	} else {
+		netdev_for_each_uc_addr(ha, ndev) {
+			i++;
+			sun8i_emac_set_macaddr(priv, ha->addr, i);
+		}
+	}
+	writel(v, priv->base + SUN8I_EMAC_RX_FRM_FLT);
+}
+
+static void sun8i_emac_tx_timeout(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	netdev_err(ndev, "%s\n", __func__);
+
+	sun8i_emac_stop_tx(ndev);
+
+	sun8i_emac_tx_clean(ndev);
+
+	/* write start of tx ring descriptor */
+	writel(priv->dd_tx_phy, priv->base + SUN8I_EMAC_TX_DESC_LIST);
+
+	sun8i_emac_start_tx(ndev);
+
+	netdev_reset_queue(ndev);
+
+	ndev->stats.tx_errors++;
+	netif_wake_queue(ndev);
+}
+
+static int sun8i_emac_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
+{
+	struct phy_device *phydev = ndev->phydev;
+
+	if (!netif_running(ndev))
+		return -EINVAL;
+
+	if (!phydev)
+		return -ENODEV;
+
+	return phy_mii_ioctl(phydev, rq, cmd);
+}
+
+static int sun8i_emac_check_if_running(struct net_device *ndev)
+{
+	if (!netif_running(ndev))
+		return -EINVAL;
+	return 0;
+}
+
+static int sun8i_emac_get_sset_count(struct net_device *ndev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ARRAY_SIZE(estats_str);
+	}
+	return -EOPNOTSUPP;
+}
+
+static int sun8i_emac_ethtool_get_settings(struct net_device *ndev,
+					   struct ethtool_cmd *cmd)
+{
+	struct phy_device *phy = ndev->phydev;
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	if (!phy) {
+		netdev_err(ndev, "%s: %s: PHY is not registered\n",
+			   __func__, ndev->name);
+		return -ENODEV;
+	}
+
+	if (!netif_running(ndev)) {
+		dev_err(priv->dev, "interface disabled: we cannot track link speed / duplex setting\n");
+		return -EBUSY;
+	}
+
+	return phy_ethtool_gset(phy, cmd);
+}
+
+static int sun8i_emac_ethtool_set_settings(struct net_device *ndev,
+					   struct ethtool_cmd *cmd)
+{
+	struct phy_device *phy = ndev->phydev;
+
+	return phy_ethtool_sset(phy, cmd);
+}
+
+static void sun8i_emac_ethtool_getdrvinfo(struct net_device *ndev,
+					  struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, "sun8i_emac", sizeof(info->driver));
+	strcpy(info->version, "00");
+	info->fw_version[0] = '\0';
+}
+
+static void sun8i_emac_ethtool_stats(struct net_device *ndev,
+				     struct ethtool_stats *dummy, u64 *data)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	memcpy(data, &priv->estats,
+	       sun8i_emac_get_sset_count(ndev, ETH_SS_STATS) * sizeof(u64));
+}
+
+static void sun8i_emac_ethtool_strings(struct net_device *dev, u32 stringset,
+				       u8 *buffer)
+{
+	switch (stringset) {
+	case ETH_SS_STATS:
+		memcpy(buffer, &estats_str, sizeof(estats_str));
+		break;
+	}
+}
+
+static u32 sun8i_emac_ethtool_getmsglevel(struct net_device *ndev)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	return priv->msg_enable;
+}
+
+static void sun8i_emac_ethtool_setmsglevel(struct net_device *ndev, u32 level)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	priv->msg_enable = level;
+}
+
+static void sun8i_emac_get_pauseparam(struct net_device *ndev,
+				      struct ethtool_pauseparam *pause)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	pause->rx_pause = 0;
+	pause->tx_pause = 0;
+	pause->autoneg = ndev->phydev->autoneg;
+
+	if (priv->flow_ctrl & FLOW_RX)
+		pause->rx_pause = 1;
+	if (priv->flow_ctrl & FLOW_TX)
+		pause->tx_pause = 1;
+}
+
+static int sun8i_emac_set_pauseparam(struct net_device *ndev,
+				     struct ethtool_pauseparam *pause)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	struct phy_device *phy = ndev->phydev;
+	int new_pause = 0;
+	int ret = 0;
+
+	if (pause->rx_pause)
+		new_pause |= FLOW_RX;
+	if (pause->tx_pause)
+		new_pause |= FLOW_TX;
+
+	priv->flow_ctrl = new_pause;
+	phy->autoneg = pause->autoneg;
+
+	if (phy->autoneg) {
+		if (netif_running(ndev))
+			ret = phy_start_aneg(phy);
+	} else {
+		sun8i_emac_flow_ctrl(priv, phy->duplex, priv->flow_ctrl);
+	}
+	return ret;
+}
+
+static void sun8i_emac_ethtool_get_ringparam(struct net_device *ndev,
+					     struct ethtool_ringparam *ring)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+
+	ring->rx_pending = priv->nbdesc_rx;
+	ring->tx_pending = priv->nbdesc_tx;
+}
+
+static int sun8i_emac_ethtool_set_ringparam(struct net_device *ndev,
+					    struct ethtool_ringparam *ring)
+{
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	int err;
+
+	if (ring->rx_max_pending || ring->rx_mini_max_pending ||
+	    ring->rx_jumbo_max_pending || ring->rx_mini_pending ||
+	    ring->rx_jumbo_pending || ring->tx_max_pending)
+		return -EINVAL;
+
+	if (ring->tx_pending < MAX_SKB_FRAGS + 1) {
+		netdev_err(ndev, "The number of TX descriptors is too low");
+		return -EINVAL;
+	}
+
+	sun8i_emac_stop_tx(ndev);
+	sun8i_emac_stop_rx(ndev);
+
+	sun8i_emac_rx_clean(ndev);
+	sun8i_emac_tx_clean(ndev);
+
+	kfree(priv->rx_skb);
+	kfree(priv->txl);
+
+	dma_free_coherent(priv->dev, priv->nbdesc_rx * sizeof(struct dma_desc),
+			  priv->dd_rx, priv->dd_rx_phy);
+	dma_free_coherent(priv->dev, priv->nbdesc_tx * sizeof(struct dma_desc),
+			  priv->dd_tx, priv->dd_tx_phy);
+
+	priv->nbdesc_rx = ring->rx_pending;
+	priv->nbdesc_tx = ring->tx_pending;
+	err = sun8i_emac_alloc_rings(ndev);
+	if (err) {
+		/* Fatal error, we cannot re start */
+		netdev_err(ndev, "Fail to allocate rings\n");
+		return -EFAULT;
+	}
+
+	sun8i_emac_start_rx(ndev);
+	sun8i_emac_start_tx(ndev);
+
+	netif_start_queue(ndev);
+
+	netdev_info(ndev, "Ring Param settings: rx: %d, tx %d\n",
+		    ring->rx_pending, ring->tx_pending);
+	return 0;
+}
+
+static const struct ethtool_ops sun8i_emac_ethtool_ops = {
+	.begin = sun8i_emac_check_if_running,
+	.get_settings = sun8i_emac_ethtool_get_settings,
+	.set_settings = sun8i_emac_ethtool_set_settings,
+	.get_link = ethtool_op_get_link,
+	.get_pauseparam = sun8i_emac_get_pauseparam,
+	.set_pauseparam = sun8i_emac_set_pauseparam,
+	.get_ethtool_stats = sun8i_emac_ethtool_stats,
+	.get_strings = sun8i_emac_ethtool_strings,
+	.get_sset_count = sun8i_emac_get_sset_count,
+	.get_drvinfo = sun8i_emac_ethtool_getdrvinfo,
+	.get_msglevel = sun8i_emac_ethtool_getmsglevel,
+	.set_msglevel = sun8i_emac_ethtool_setmsglevel,
+	.get_ringparam = sun8i_emac_ethtool_get_ringparam,
+	.set_ringparam = sun8i_emac_ethtool_set_ringparam,
+};
+
+static const struct net_device_ops sun8i_emac_netdev_ops = {
+	.ndo_init = sun8i_emac_init,
+	.ndo_uninit = sun8i_emac_uninit,
+	.ndo_open = sun8i_emac_open,
+	.ndo_start_xmit = sun8i_emac_xmit,
+	.ndo_stop = sun8i_emac_stop,
+	.ndo_change_mtu = sun8i_emac_change_mtu,
+	.ndo_fix_features = sun8i_emac_fix_features,
+	.ndo_set_features = sun8i_emac_set_features,
+	.ndo_set_rx_mode = sun8i_emac_set_rx_mode,
+	.ndo_tx_timeout = sun8i_emac_tx_timeout,
+	.ndo_do_ioctl = sun8i_emac_ioctl,
+	.ndo_set_mac_address = eth_mac_addr,
+};
+
+static irqreturn_t sun8i_emac_dma_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = dev_id;
+	struct sun8i_emac_priv *priv = netdev_priv(ndev);
+	u32 v, u;
+
+	v = readl(priv->base + SUN8I_EMAC_INT_STA);
+
+	/* When this bit is asserted, a frame transmission is completed. */
+	if (v & BIT(0)) {
+		priv->estats.tx_int++;
+		writel(0, priv->base + SUN8I_EMAC_INT_EN);
+		napi_schedule(&priv->napi);
+	}
+
+	/* When this bit is asserted, the TX DMA FSM is stopped. */
+	if (v & BIT(1))
+		priv->estats.tx_dma_stop++;
+
+	/* When this asserted, the TX DMA can not acquire next TX descriptor
+	 * and TX DMA FSM is suspended.
+	*/
+	if (v & BIT(2))
+		priv->estats.tx_dma_ua++;
+
+	if (v & BIT(3))
+		netif_dbg(priv, intr, ndev, "Unhandled interrupt TX TIMEOUT\n");
+
+	if (v & BIT(4)) {
+		netif_dbg(priv, intr, ndev, "Unhandled interrupt TX underflow\n");
+		priv->estats.tx_underflow_int++;
+	}
+
+	/* When this bit asserted , the frame is transmitted to FIFO totally. */
+	if (v & BIT(5)) {
+		netif_dbg(priv, intr, ndev, "Unhandled interrupt TX_EARLY_INT\n");
+		priv->estats.tx_early_int++;
+	}
+
+	/* When this bit is asserted, a frame reception is completed  */
+	if (v & BIT(8)) {
+		priv->estats.rx_int++;
+		writel(0, priv->base + SUN8I_EMAC_INT_EN);
+		napi_schedule(&priv->napi);
+	}
+
+	/* When this asserted, the RX DMA can not acquire next TX descriptor
+	 * and RX DMA FSM is suspended.
+	*/
+	if (v & BIT(9)) {
+		u = readl(priv->base + SUN8I_EMAC_RX_CTL1);
+		netif_info(priv, intr, ndev, "Re-run RX DMA %x\n", u);
+		writel(u | RX_DMA_START, priv->base + SUN8I_EMAC_RX_CTL1);
+		priv->estats.rx_dma_ua++;
+	}
+
+	if (v & BIT(10)) {
+		netif_dbg(priv, intr, ndev, "Unhandled interrupt RX_DMA_STOPPED_INT\n");
+		priv->estats.rx_dma_stop++;
+	}
+	if (v & BIT(11))
+		netif_dbg(priv, intr, ndev, "Unhandled interrupt RX_TIMEOUT\n");
+	if (v & BIT(12))
+		netif_dbg(priv, intr, ndev, "Unhandled interrupt RX OVERFLOW\n");
+	if (v & BIT(13)) {
+		netif_dbg(priv, intr, ndev, "Unhandled interrupt RX EARLY\n");
+		priv->estats.rx_early_int++;
+	}
+	if (v & BIT(16))
+		netif_dbg(priv, intr, ndev, "Unhandled interrupt RGMII\n");
+
+	/* the datasheet state those register as read-only
+	 * but nothing work(freeze) without writing to it
+	 */
+	writel(v & 0x3FFF, priv->base + SUN8I_EMAC_INT_STA);
+
+	return IRQ_HANDLED;
+}
+
+static int sun8i_emac_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct sun8i_emac_priv *priv;
+	struct net_device *ndev;
+	struct resource *res;
+	int ret;
+
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_err(&pdev->dev, "No suitable DMA available\n");
+		return ret;
+	}
+
+	ndev = alloc_etherdev(sizeof(*priv));
+	if (!ndev)
+		return -ENOMEM;
+
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+	priv = netdev_priv(ndev);
+	platform_set_drvdata(pdev, ndev);
+
+	priv->variant = (enum emac_variant)of_device_get_match_data(&pdev->dev);
+	if (!priv->variant) {
+		dev_err(&pdev->dev, "Missing sun8i-emac variant\n");
+		return -EINVAL;
+	}
+
+	priv->phy_node = of_parse_phandle(node, "phy", 0);
+	if (!priv->phy_node) {
+		netdev_err(ndev, "No associated PHY\n");
+		return -ENODEV;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->base)) {
+		ret = PTR_ERR(priv->base);
+		dev_err(&pdev->dev, "Cannot request MMIO: %d\n", ret);
+		return ret;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "syscon");
+	priv->syscon = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->syscon)) {
+		ret = PTR_ERR(priv->syscon);
+		dev_err(&pdev->dev,
+			"Cannot map system control registers: %d\n", ret);
+		return ret;
+	}
+
+	priv->ahb_clk = devm_clk_get(&pdev->dev, "ahb");
+	if (IS_ERR(priv->ahb_clk)) {
+		ret = PTR_ERR(priv->ahb_clk);
+		dev_err(&pdev->dev, "Cannot get AHB clock err=%d\n", ret);
+		goto probe_err;
+	}
+
+	priv->rst = devm_reset_control_get_optional(&pdev->dev, "ahb");
+	if (IS_ERR(priv->rst)) {
+		ret = PTR_ERR(priv->rst);
+		if (ret == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_info(&pdev->dev, "No MAC reset control found %d\n", ret);
+		priv->rst = NULL;
+	}
+
+	if (priv->variant == H3_EMAC)
+		priv->use_internal_phy = of_property_read_bool(node,
+				"allwinner,use-internal-phy");
+
+	if (priv->use_internal_phy) {
+		priv->ephy_clk = devm_clk_get(&pdev->dev, "ephy");
+		if (IS_ERR(priv->ephy_clk)) {
+			ret = PTR_ERR(priv->ephy_clk);
+			dev_err(&pdev->dev, "Cannot get EPHY clock err=%d\n",
+				ret);
+			goto probe_err;
+		}
+
+		priv->rst_ephy = devm_reset_control_get_optional(&pdev->dev,
+								 "ephy");
+		if (IS_ERR(priv->rst_ephy)) {
+			ret = PTR_ERR(priv->rst_ephy);
+			if (ret == -EPROBE_DEFER)
+				goto probe_err;
+			dev_info(&pdev->dev,
+				 "No EPHY reset control found %d\n", ret);
+			priv->rst_ephy = NULL;
+		}
+	}
+
+	priv->irq = platform_get_irq(pdev, 0);
+	if (priv->irq < 0) {
+		ret = priv->irq;
+		dev_err(&pdev->dev, "Cannot claim IRQ: %d\n", ret);
+		goto probe_err;
+	}
+
+	spin_lock_init(&priv->tx_lock);
+
+	ndev->netdev_ops = &sun8i_emac_netdev_ops;
+	ndev->ethtool_ops = &sun8i_emac_ethtool_ops;
+
+	priv->ndev = ndev;
+	priv->dev = &pdev->dev;
+
+	ndev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA;
+	ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		NETIF_F_RXCSUM;
+	ndev->features |= ndev->hw_features;
+	ndev->hw_features |= NETIF_F_RXFCS;
+	ndev->hw_features |= NETIF_F_RXALL;
+	ndev->hw_features |= NETIF_F_LOOPBACK;
+	ndev->priv_flags |= IFF_UNICAST_FLT;
+
+	ndev->watchdog_timeo = msecs_to_jiffies(5000);
+	netif_carrier_off(ndev);
+
+	/* Benched on OPIPC with 100M, setting more than 256 does not give any
+	 * perf boost
+	 */
+	priv->nbdesc_rx = 128;
+	priv->nbdesc_tx = 256;
+
+	ret = register_netdev(ndev);
+	if (ret) {
+		dev_err(&pdev->dev, "ERROR: Register %s failed\n", ndev->name);
+		goto probe_err;
+	}
+
+	return 0;
+
+probe_err:
+	free_netdev(ndev);
+	return ret;
+}
+
+static int sun8i_emac_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+
+	unregister_netdev(ndev);
+	platform_set_drvdata(pdev, NULL);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static const struct of_device_id sun8i_emac_of_match_table[] = {
+	{ .compatible = "allwinner,sun8i-a83t-emac",
+	  .data = (void *)A83T_EMAC },
+	{ .compatible = "allwinner,sun8i-h3-emac",
+	  .data = (void *)H3_EMAC },
+	{ .compatible = "allwinner,sun50i-a64-emac",
+	  .data = (void *)A64_EMAC },
+	{}
+};
+MODULE_DEVICE_TABLE(of, sun8i_emac_of_match_table);
+
+static struct platform_driver sun8i_emac_driver = {
+	.probe          = sun8i_emac_probe,
+	.remove         = sun8i_emac_remove,
+	.driver         = {
+		.name           = "sun8i-emac",
+		.of_match_table	= sun8i_emac_of_match_table,
+	},
+};
+
+module_platform_driver(sun8i_emac_driver);
+
+MODULE_DESCRIPTION("sun8i Ethernet driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("LABBE Corentin <clabbe.montjoie@gmail.com");
diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index 33df340..425856f 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -106,6 +106,10 @@ struct davinci_mdio_data {
 	u32		clk_div;
 };
 
+#if IS_ENABLED(CONFIG_OF)
+static void davinci_mdio_update_dt_from_phymask(u32 phy_mask);
+#endif
+
 static void davinci_mdio_init_clk(struct davinci_mdio_data *data)
 {
 	u32 mdio_in, div, mdio_out_khz, access_time;
@@ -171,6 +175,12 @@ static int davinci_mdio_reset(struct mii_bus *bus)
 		/* restrict mdio bus to live phys only */
 		dev_info(data->dev, "detected phy mask %x\n", ~phy_mask);
 		phy_mask = ~phy_mask;
+
+		#if IS_ENABLED(CONFIG_OF)
+		if (of_machine_is_compatible("ti,am335x-bone"))
+			davinci_mdio_update_dt_from_phymask(phy_mask);
+		#endif
+
 	} else {
 		/* desperately scan all phys */
 		dev_warn(data->dev, "no live phy, scanning all\n");
@@ -334,6 +344,93 @@ static int davinci_mdio_probe_dt(struct mdio_platform_data *data,
 
 	return 0;
 }
+static void davinci_mdio_update_dt_from_phymask(u32 phy_mask)
+{
+	int i, len, skip;
+	u32 addr;
+	__be32 *old_phy_p, *phy_id_p;
+	struct property *phy_id_property = NULL;
+	struct device_node *node_p, *slave_p;
+
+	addr = 0;
+
+	for (i = 0; i < PHY_MAX_ADDR; i++) {
+		if ((phy_mask & (1 << i)) == 0) {
+			addr = (u32) i;
+		break;
+		}
+	}
+
+	for_each_compatible_node(node_p, NULL, "ti,cpsw") {
+		for_each_node_by_name(slave_p, "slave") {
+
+#if IS_ENABLED(CONFIG_OF_OVERLAY)
+			skip = 1;
+			// Hack, the overlay fixup "slave" doesn't have phy-mode...
+			old_phy_p = (__be32 *) of_get_property(slave_p, "phy-mode", &len);
+
+			if (len != (sizeof(__be32 *) * 1))
+			{
+				skip = 0;
+			}
+
+			if (skip) {
+#endif
+
+			old_phy_p = (__be32 *) of_get_property(slave_p, "phy_id", &len);
+
+			if (len != (sizeof(__be32 *) * 2))
+				goto err_out;
+
+			if (old_phy_p) {
+
+				phy_id_property = kzalloc(sizeof(*phy_id_property), GFP_KERNEL);
+
+				if (! phy_id_property)
+					goto err_out;
+
+				phy_id_property->length = len;
+				phy_id_property->name = kstrdup("phy_id", GFP_KERNEL);
+				phy_id_property->value = kzalloc(len, GFP_KERNEL);
+
+				if (! phy_id_property->name)
+					goto err_out;
+
+				if (! phy_id_property->value)
+					goto err_out;
+
+				memcpy(phy_id_property->value, old_phy_p, len);
+
+				phy_id_p = (__be32 *) phy_id_property->value + 1;
+
+				*phy_id_p = cpu_to_be32(addr);
+
+				of_update_property(slave_p, phy_id_property);
+				pr_info("davinci_mdio: dt: updated phy_id[%d] from phy_mask[%x]\n", addr, phy_mask);
+
+				++addr;
+			}
+#if IS_ENABLED(CONFIG_OF_OVERLAY)
+		}
+#endif
+		}
+	}
+
+	return;
+
+err_out:
+
+	if (phy_id_property) {
+		if (phy_id_property->name)
+			kfree(phy_id_property->name);
+
+	if (phy_id_property->value)
+		kfree(phy_id_property->value);
+
+	if (phy_id_property)
+		kfree(phy_id_property);
+	}
+}
 #endif
 
 #if IS_ENABLED(CONFIG_OF)
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index ba7b034..7d4169e 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -112,4 +112,11 @@ config OF_OVERLAY
 config OF_NUMA
 	bool
 
+config OF_CONFIGFS
+	bool "Device Tree Overlay ConfigFS interface"
+	select CONFIGFS_FS
+	depends on OF_OVERLAY
+	help
+	  Enable a simple user-space driven DT overlay interface.
+
 endif # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index d7efd9d..aa5ef9d 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -1,4 +1,5 @@
 obj-y = base.o device.o platform.o
+obj-$(CONFIG_OF_CONFIGFS) += configfs.o
 obj-$(CONFIG_OF_DYNAMIC) += dynamic.o
 obj-$(CONFIG_OF_FLATTREE) += fdt.o
 obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o
diff --git a/drivers/of/base.c b/drivers/of/base.c
index a0bccb5..4e9df30 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/proc_fs.h>
+#include <linux/rhashtable.h>
 
 #include "of_private.h"
 
@@ -44,6 +45,18 @@ static const char *of_stdout_options;
 
 struct kset *of_kset;
 
+const struct rhashtable_params of_phandle_ht_params = {
+	.key_offset = offsetof(struct device_node, phandle), /* base offset */
+	.key_len = sizeof(phandle),
+	.head_offset = offsetof(struct device_node, ht_node),
+	.automatic_shrinking = true,
+};
+
+struct rhashtable *of_phandle_ht;
+
+/* default is false */
+bool of_phandle_ht_is_disabled;
+
 /*
  * Used to protect the of_aliases, to hold off addition of nodes to sysfs.
  * This mutex must be held whenever modifications are being made to the
@@ -163,13 +176,19 @@ int __of_add_property_sysfs(struct device_node *np, struct property *pp)
 	return rc;
 }
 
-int __of_attach_node_sysfs(struct device_node *np)
+int __of_attach_node_post(struct device_node *np)
 {
 	const char *name;
 	struct kobject *parent;
 	struct property *pp;
 	int rc;
 
+	if (of_phandle_ht_available()) {
+		rc = of_phandle_ht_insert(np);
+		WARN(rc, "insert to phandle hash fail @%s\n",
+				of_node_full_name(np));
+	}
+
 	if (!IS_ENABLED(CONFIG_SYSFS))
 		return 0;
 
@@ -201,6 +220,18 @@ int __of_attach_node_sysfs(struct device_node *np)
 void __init of_core_init(void)
 {
 	struct device_node *np;
+	int ret;
+
+	of_phandle_ht = kzalloc(sizeof(*of_phandle_ht), GFP_KERNEL);
+	if (!of_phandle_ht) {
+		pr_warn("devicetree: Failed to allocate hashtable\n");
+		return;
+	}
+	ret = rhashtable_init(of_phandle_ht, &of_phandle_ht_params);
+	if (ret) {
+		pr_warn("devicetree: Failed to initialize hashtable\n");
+		return;
+	}
 
 	/* Create the kset, and register existing nodes */
 	mutex_lock(&of_mutex);
@@ -211,12 +242,16 @@ void __init of_core_init(void)
 		return;
 	}
 	for_each_of_allnodes(np)
-		__of_attach_node_sysfs(np);
+		__of_attach_node_post(np);
 	mutex_unlock(&of_mutex);
 
 	/* Symlink in /proc as required by userspace ABI */
 	if (of_root)
 		proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base");
+
+	ret = of_overlay_init();
+	if (ret != 0)
+		pr_warn("of_init: of_overlay_init failed!\n");
 }
 
 static struct property *__of_find_property(const struct device_node *np,
@@ -1100,9 +1135,14 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 		return NULL;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	for_each_of_allnodes(np)
-		if (np->phandle == handle)
-			break;
+	/* when we're ready use the hash table (and not disabled) */
+	if (of_phandle_ht_available() && !of_phandle_ht_is_disabled)
+		np = of_phandle_ht_lookup(handle);
+	else { /* fallback */
+		for_each_of_allnodes(np)
+			if (np->phandle == handle)
+				break;
+	}
 	of_node_get(np);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
diff --git b/drivers/of/configfs.c b/drivers/of/configfs.c
new file mode 100644
index 0000000..c7e999c
--- /dev/null
+++ b/drivers/of/configfs.c
@@ -0,0 +1,307 @@
+/*
+ * Configfs entries for device-tree
+ *
+ * Copyright (C) 2013 - Pantelis Antoniou <panto@antoniou-consulting.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/ctype.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/spinlock.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/configfs.h>
+#include <linux/types.h>
+#include <linux/stat.h>
+#include <linux/limits.h>
+#include <linux/file.h>
+#include <linux/vmalloc.h>
+#include <linux/firmware.h>
+
+#include "of_private.h"
+
+struct cfs_overlay_item {
+	struct config_item	item;
+
+	char			path[PATH_MAX];
+
+	const struct firmware	*fw;
+	struct device_node	*overlay;
+	int			ov_id;
+
+	void			*dtbo;
+	int			dtbo_size;
+};
+
+static int create_overlay(struct cfs_overlay_item *overlay, void *blob)
+{
+	int err;
+
+	/* unflatten the tree */
+	of_fdt_unflatten_tree(blob, NULL, &overlay->overlay);
+	if (overlay->overlay == NULL) {
+		pr_err("%s: failed to unflatten tree\n", __func__);
+		err = -EINVAL;
+		goto out_err;
+	}
+	pr_debug("%s: unflattened OK\n", __func__);
+
+	/* mark it as detached */
+	of_node_set_flag(overlay->overlay, OF_DETACHED);
+
+	/* perform resolution */
+	err = of_resolve_phandles(overlay->overlay);
+	if (err != 0) {
+		pr_err("%s: Failed to resolve tree\n", __func__);
+		goto out_err;
+	}
+	pr_debug("%s: resolved OK\n", __func__);
+
+	err = of_overlay_create(overlay->overlay);
+	if (err < 0) {
+		pr_err("%s: Failed to create overlay (err=%d)\n",
+				__func__, err);
+		goto out_err;
+	}
+	overlay->ov_id = err;
+
+out_err:
+	return err;
+}
+
+static inline struct cfs_overlay_item *to_cfs_overlay_item(
+		struct config_item *item)
+{
+	return item ? container_of(item, struct cfs_overlay_item, item) : NULL;
+}
+
+static ssize_t cfs_overlay_item_path_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%s\n", to_cfs_overlay_item(item)->path);
+}
+
+static ssize_t cfs_overlay_item_path_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct cfs_overlay_item *overlay = to_cfs_overlay_item(item);
+	const char *p = page;
+	char *s;
+	int err;
+
+	/* if it's set do not allow changes */
+	if (overlay->path[0] != '\0' || overlay->dtbo_size > 0)
+		return -EPERM;
+
+	/* copy to path buffer (and make sure it's always zero terminated */
+	count = snprintf(overlay->path, sizeof(overlay->path) - 1, "%s", p);
+	overlay->path[sizeof(overlay->path) - 1] = '\0';
+
+	/* strip trailing newlines */
+	s = overlay->path + strlen(overlay->path);
+	while (s > overlay->path && *--s == '\n')
+		*s = '\0';
+
+	pr_debug("%s: path is '%s'\n", __func__, overlay->path);
+
+	err = request_firmware(&overlay->fw, overlay->path, NULL);
+	if (err != 0)
+		goto out_err;
+
+	err = create_overlay(overlay, (void *)overlay->fw->data);
+	if (err < 0)
+		goto out_err;
+
+	return count;
+
+out_err:
+
+	release_firmware(overlay->fw);
+	overlay->fw = NULL;
+
+	overlay->path[0] = '\0';
+	return err;
+}
+
+static ssize_t cfs_overlay_item_status_show(struct config_item *item,
+		char *page)
+{
+	return sprintf(page, "%s\n", to_cfs_overlay_item(item)->ov_id >= 0 ?
+					"applied" : "unapplied");
+}
+
+CONFIGFS_ATTR(cfs_overlay_item_, path);
+CONFIGFS_ATTR_RO(cfs_overlay_item_, status);
+
+static struct configfs_attribute *cfs_overlay_attrs[] = {
+	&cfs_overlay_item_attr_path,
+	&cfs_overlay_item_attr_status,
+	NULL,
+};
+
+ssize_t cfs_overlay_item_dtbo_read(struct config_item *item, void *buf,
+		size_t max_count)
+{
+	struct cfs_overlay_item *overlay = to_cfs_overlay_item(item);
+
+	pr_debug("%s: buf=%p max_count=%u\n", __func__,
+			buf, max_count);
+
+	if (overlay->dtbo == NULL)
+		return 0;
+
+	/* copy if buffer provided */
+	if (buf != NULL) {
+		/* the buffer must be large enough */
+		if (overlay->dtbo_size > max_count)
+			return -ENOSPC;
+
+		memcpy(buf, overlay->dtbo, overlay->dtbo_size);
+	}
+
+	return overlay->dtbo_size;
+}
+
+ssize_t cfs_overlay_item_dtbo_write(struct config_item *item, const void *buf,
+		size_t count)
+{
+	struct cfs_overlay_item *overlay = to_cfs_overlay_item(item);
+	int err;
+
+	/* if it's set do not allow changes */
+	if (overlay->path[0] != '\0' || overlay->dtbo_size > 0)
+		return -EPERM;
+
+	/* copy the contents */
+	overlay->dtbo = kmemdup(buf, count, GFP_KERNEL);
+	if (overlay->dtbo == NULL)
+		return -ENOMEM;
+
+	overlay->dtbo_size = count;
+
+	err = create_overlay(overlay, overlay->dtbo);
+	if (err < 0)
+		goto out_err;
+
+	return count;
+
+out_err:
+	kfree(overlay->dtbo);
+	overlay->dtbo = NULL;
+	overlay->dtbo_size = 0;
+
+	return err;
+}
+
+CONFIGFS_BIN_ATTR(cfs_overlay_item_, dtbo, NULL, SZ_1M);
+
+static struct configfs_bin_attribute *cfs_overlay_bin_attrs[] = {
+	&cfs_overlay_item_attr_dtbo,
+	NULL,
+};
+
+static void cfs_overlay_release(struct config_item *item)
+{
+	struct cfs_overlay_item *overlay = to_cfs_overlay_item(item);
+
+	if (overlay->ov_id >= 0)
+		of_overlay_destroy(overlay->ov_id);
+	if (overlay->fw)
+		release_firmware(overlay->fw);
+	/* kfree with NULL is safe */
+	kfree(overlay->dtbo);
+	kfree(overlay);
+}
+
+static struct configfs_item_operations cfs_overlay_item_ops = {
+	.release		= cfs_overlay_release,
+};
+
+static struct config_item_type cfs_overlay_type = {
+	.ct_item_ops	= &cfs_overlay_item_ops,
+	.ct_attrs	= cfs_overlay_attrs,
+	.ct_bin_attrs	= cfs_overlay_bin_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct config_item *cfs_overlay_group_make_item(
+		struct config_group *group, const char *name)
+{
+	struct cfs_overlay_item *overlay;
+
+	overlay = kzalloc(sizeof(*overlay), GFP_KERNEL);
+	if (!overlay)
+		return ERR_PTR(-ENOMEM);
+	overlay->ov_id = -1;
+
+	config_item_init_type_name(&overlay->item, name, &cfs_overlay_type);
+	return &overlay->item;
+}
+
+static void cfs_overlay_group_drop_item(struct config_group *group,
+		struct config_item *item)
+{
+	struct cfs_overlay_item *overlay = to_cfs_overlay_item(item);
+
+	config_item_put(&overlay->item);
+}
+
+static struct configfs_group_operations overlays_ops = {
+	.make_item	= cfs_overlay_group_make_item,
+	.drop_item	= cfs_overlay_group_drop_item,
+};
+
+static struct config_item_type overlays_type = {
+	.ct_group_ops   = &overlays_ops,
+	.ct_owner       = THIS_MODULE,
+};
+
+static struct configfs_group_operations of_cfs_ops = {
+	/* empty - we don't allow anything to be created */
+};
+
+static struct config_item_type of_cfs_type = {
+	.ct_group_ops   = &of_cfs_ops,
+	.ct_owner       = THIS_MODULE,
+};
+
+struct config_group of_cfs_overlay_group;
+
+static struct configfs_subsystem of_cfs_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "device-tree",
+			.ci_type = &of_cfs_type,
+		},
+	},
+	.su_mutex = __MUTEX_INITIALIZER(of_cfs_subsys.su_mutex),
+};
+
+static int __init of_cfs_init(void)
+{
+	int ret;
+
+	pr_info("%s\n", __func__);
+
+	config_group_init(&of_cfs_subsys.su_group);
+	config_group_init_type_name(&of_cfs_overlay_group, "overlays",
+			&overlays_type);
+	configfs_add_default_group(&of_cfs_overlay_group,
+			&of_cfs_subsys.su_group);
+
+	ret = configfs_register_subsystem(&of_cfs_subsys);
+	if (ret != 0) {
+		pr_err("%s: failed to register subsys\n", __func__);
+		goto out;
+	}
+	pr_info("%s: OK\n", __func__);
+out:
+	return ret;
+}
+late_initcall(of_cfs_init);
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 888fdbc..2a89b17 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/proc_fs.h>
+#include <linux/rhashtable.h>
 
 #include "of_private.h"
 
@@ -43,9 +44,16 @@ void of_node_put(struct device_node *node)
 }
 EXPORT_SYMBOL(of_node_put);
 
-void __of_detach_node_sysfs(struct device_node *np)
+void __of_detach_node_post(struct device_node *np)
 {
 	struct property *pp;
+	int rc;
+
+	if (of_phandle_ht_available()) {
+		rc = of_phandle_ht_remove(np);
+		WARN(rc, "remove from phandle hash fail @%s\n",
+				of_node_full_name(np));
+	}
 
 	if (!IS_ENABLED(CONFIG_SYSFS))
 		return;
@@ -253,7 +261,7 @@ int of_attach_node(struct device_node *np)
 	__of_attach_node(np);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-	__of_attach_node_sysfs(np);
+	__of_attach_node_post(np);
 	mutex_unlock(&of_mutex);
 
 	of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, &rd);
@@ -306,7 +314,7 @@ int of_detach_node(struct device_node *np)
 	__of_detach_node(np);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-	__of_detach_node_sysfs(np);
+	__of_detach_node_post(np);
 	mutex_unlock(&of_mutex);
 
 	of_reconfig_notify(OF_RECONFIG_DETACH_NODE, &rd);
@@ -397,8 +405,9 @@ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags)
 }
 
 /**
- * __of_node_dup() - Duplicate or create an empty device node dynamically.
- * @fmt: Format string (plus vargs) for new full name of the device node
+ * __of_node_dupv() - Duplicate or create an empty device node dynamically.
+ * @fmt: Format string for new full name of the device node
+ * @vargs: va_list containing the arugments for the node full name
  *
  * Create an device tree node, either by duplicating an empty node or by allocating
  * an empty one suitable for further modification.  The node data are
@@ -406,17 +415,15 @@ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags)
  * OF_DETACHED bits set. Returns the newly allocated node or NULL on out of
  * memory error.
  */
-struct device_node *__of_node_dup(const struct device_node *np, const char *fmt, ...)
+struct device_node *__of_node_dupv(const struct device_node *np,
+		const char *fmt, va_list vargs)
 {
-	va_list vargs;
 	struct device_node *node;
 
 	node = kzalloc(sizeof(*node), GFP_KERNEL);
 	if (!node)
 		return NULL;
-	va_start(vargs, fmt);
 	node->full_name = kvasprintf(GFP_KERNEL, fmt, vargs);
-	va_end(vargs);
 	if (!node->full_name) {
 		kfree(node);
 		return NULL;
@@ -448,6 +455,24 @@ struct device_node *__of_node_dup(const struct device_node *np, const char *fmt,
 	return NULL;
 }
 
+/**
+ * __of_node_dup() - Duplicate or create an empty device node dynamically.
+ * @fmt: Format string (plus vargs) for new full name of the device node
+ *
+ * See: __of_node_dupv()
+ */
+struct device_node *__of_node_dup(const struct device_node *np,
+		const char *fmt, ...)
+{
+	va_list vargs;
+	struct device_node *node;
+
+	va_start(vargs, fmt);
+	node = __of_node_dupv(np, fmt, vargs);
+	va_end(vargs);
+	return node;
+}
+
 static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
 {
 	of_node_put(ce->np);
@@ -614,10 +639,10 @@ static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
 
 	switch (ce->action) {
 	case OF_RECONFIG_ATTACH_NODE:
-		__of_attach_node_sysfs(ce->np);
+		__of_attach_node_post(ce->np);
 		break;
 	case OF_RECONFIG_DETACH_NODE:
-		__of_detach_node_sysfs(ce->np);
+		__of_detach_node_post(ce->np);
 		break;
 	case OF_RECONFIG_ADD_PROPERTY:
 		/* ignore duplicate names */
@@ -813,3 +838,295 @@ int of_changeset_action(struct of_changeset *ocs, unsigned long action,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(of_changeset_action);
+
+/* changeset helpers */
+
+/**
+ * of_changeset_create_device_node - Create an empty device node
+ *
+ * @ocs:	changeset pointer
+ * @parent:	parent device node
+ * @fmt:	format string for the node's full_name
+ * @args:	argument list for the format string
+ *
+ * Create an empty device node, marking it as detached and allocated.
+ *
+ * Returns a device node on success, an error encoded pointer otherwise
+ */
+struct device_node *of_changeset_create_device_nodev(
+	struct of_changeset *ocs, struct device_node *parent,
+	const char *fmt, va_list vargs)
+{
+	struct device_node *node;
+
+	node = __of_node_dupv(NULL, fmt, vargs);
+	if (!node)
+		return ERR_PTR(-ENOMEM);
+
+	node->parent = parent;
+	return node;
+}
+EXPORT_SYMBOL_GPL(of_changeset_create_device_nodev);
+
+/**
+ * of_changeset_create_device_node - Create an empty device node
+ *
+ * @ocs:	changeset pointer
+ * @parent:	parent device node
+ * @fmt:	Format string for the node's full_name
+ * ...		Arguments
+ *
+ * Create an empty device node, marking it as detached and allocated.
+ *
+ * Returns a device node on success, an error encoded pointer otherwise
+ */
+__printf(3, 4) struct device_node *
+of_changeset_create_device_node(struct of_changeset *ocs,
+	struct device_node *parent, const char *fmt, ...)
+{
+	va_list vargs;
+	struct device_node *node;
+
+	va_start(vargs, fmt);
+	node = of_changeset_create_device_nodev(ocs, parent, fmt, vargs);
+	va_end(vargs);
+	return node;
+}
+EXPORT_SYMBOL_GPL(of_changeset_create_device_node);
+
+/**
+ * __of_changeset_add_property_copy - Create/update a new property copying
+ *                                    name & value
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @value:	pointer to the value data
+ * @length:	length of the value in bytes
+ * @update:	True on update operation
+ *
+ * Adds/updates a property to the changeset by making copies of the name & value
+ * entries. The @update parameter controls whether an add or update takes place.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+int __of_changeset_add_update_property_copy(struct of_changeset *ocs,
+		struct device_node *np, const char *name, const void *value,
+		int length, bool update)
+{
+	struct property *prop;
+	char *new_name;
+	void *new_value;
+	int ret = -ENOMEM;
+
+	prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+	if (!prop)
+		return -ENOMEM;
+
+	new_name = kstrdup(name, GFP_KERNEL);
+	if (!new_name)
+		goto out_err;
+
+	/*
+	 * NOTE: There is no check for zero length value.
+	 * In case of a boolean property, this will allocate a value
+	 * of zero bytes. We do this to work around the use
+	 * of of_get_property() calls on boolean values.
+	 */
+	new_value = kmemdup(value, length, GFP_KERNEL);
+	if (!new_value)
+		goto out_err;
+
+	of_property_set_flag(prop, OF_DYNAMIC);
+
+	prop->name = new_name;
+	prop->value = new_value;
+	prop->length = length;
+
+	if (!update)
+		ret = of_changeset_add_property(ocs, np, prop);
+	else
+		ret = of_changeset_update_property(ocs, np, prop);
+
+	if (!ret)
+		return 0;
+
+out_err:
+	kfree(prop->value);
+	kfree(prop->name);
+	kfree(prop);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__of_changeset_add_update_property_copy);
+
+/**
+ * of_changeset_add_property_stringf - Create a new formatted string property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @fmt:	format of string property
+ * ...		arguments of the format string
+ *
+ * Adds a string property to the changeset by making copies of the name
+ * and the formatted value.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+__printf(4, 5) int of_changeset_add_property_stringf(
+		struct of_changeset *ocs, struct device_node *np,
+		const char *name, const char *fmt, ...)
+{
+	va_list vargs;
+	int ret;
+
+	va_start(vargs, fmt);
+	ret = __of_changeset_add_update_property_stringv(ocs, np, name, fmt,
+			vargs, false);
+	va_end(vargs);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(of_changeset_add_property_stringf);
+
+/**
+ * of_changeset_update_property_stringf - Update formatted string property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @fmt:	format of string property
+ * ...		arguments of the format string
+ *
+ * Updates a string property to the changeset by making copies of the name
+ * and the formatted value.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+int of_changeset_update_property_stringf(
+	struct of_changeset *ocs, struct device_node *np,
+	const char *name, const char *fmt, ...)
+{
+	va_list vargs;
+	int ret;
+
+	va_start(vargs, fmt);
+	ret = __of_changeset_add_update_property_stringv(ocs, np, name, fmt,
+			vargs, true);
+	va_end(vargs);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(of_changeset_update_property_stringf);
+
+/**
+ * __of_changeset_add_update_property_string_list - Create/update a string
+ *                                                  list property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @strs:	pointer to the string list
+ * @count:	string count
+ * @update:	True on update operation
+ *
+ * Adds a string list property to the changeset.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+int __of_changeset_add_update_property_string_list(
+		struct of_changeset *ocs, struct device_node *np,
+		const char *name, const char **strs, int count, bool update)
+{
+	int total = 0, i, ret;
+	char *value, *s;
+
+	for (i = 0; i < count; i++) {
+		/* check if  it's NULL */
+		if (!strs[i])
+			return -EINVAL;
+		total += strlen(strs[i]) + 1;
+	}
+
+	value = kmalloc(total, GFP_KERNEL);
+	if (!value)
+		return -ENOMEM;
+
+	for (i = 0, s = value; i < count; i++) {
+		/* no need to check for NULL, check above */
+		strcpy(s, strs[i]);
+		s += strlen(strs[i]) + 1;
+	}
+
+	ret = __of_changeset_add_update_property_copy(ocs, np, name, value,
+			total, update);
+
+	kfree(value);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__of_changeset_add_update_property_string_list);
+
+static struct device_node *
+__of_changeset_node_move_one(struct of_changeset *ocs,
+		struct device_node *np, struct device_node *new_parent)
+{
+	struct device_node *np2;
+	const char *unitname;
+	int err;
+
+	err = of_changeset_detach_node(ocs, np);
+	if (err)
+		return ERR_PTR(err);
+
+	unitname = strrchr(np->full_name, '/');
+	if (!unitname)
+		unitname = np->full_name;
+
+	np2 = __of_node_dup(np, "%s/%s",
+			new_parent->full_name, unitname);
+	if (!np2)
+		return ERR_PTR(-ENOMEM);
+	np2->parent = new_parent;
+
+	err = of_changeset_attach_node(ocs, np2);
+	if (err)
+		return ERR_PTR(err);
+
+	return np2;
+}
+
+/**
+ * of_changeset_node_move_to - Moves a subtree to a new place in
+ *                             the tree
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer to be moved
+ * @to:		device node of the new parent
+ *
+ * Moves a subtree to a new place in the tree.
+ * Note that a move is a safe operation because the phandles
+ * remain valid.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+int of_changeset_node_move(struct of_changeset *ocs,
+		struct device_node *np, struct device_node *new_parent)
+{
+	struct device_node *npc, *nppc;
+
+	/* move the root first */
+	nppc = __of_changeset_node_move_one(ocs, np, new_parent);
+	if (IS_ERR(nppc))
+		return PTR_ERR(nppc);
+
+	/* move the subtrees next */
+	for_each_child_of_node(np, npc) {
+		nppc = __of_changeset_node_move_one(ocs, npc, nppc);
+		if (IS_ERR(nppc)) {
+			of_node_put(npc);
+			return PTR_ERR(nppc);
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_changeset_node_move);
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 18bbb45..2f906dd 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -79,9 +79,9 @@ extern void __of_update_property_sysfs(struct device_node *np,
 		struct property *newprop, struct property *oldprop);
 
 extern void __of_attach_node(struct device_node *np);
-extern int __of_attach_node_sysfs(struct device_node *np);
+extern int __of_attach_node_post(struct device_node *np);
 extern void __of_detach_node(struct device_node *np);
-extern void __of_detach_node_sysfs(struct device_node *np);
+extern void __of_detach_node_post(struct device_node *np);
 
 extern void __of_sysfs_remove_bin_file(struct device_node *np,
 				       struct property *prop);
@@ -95,4 +95,46 @@ extern void __of_sysfs_remove_bin_file(struct device_node *np,
 #define for_each_transaction_entry_reverse(_oft, _te) \
 	list_for_each_entry_reverse(_te, &(_oft)->te_list, node)
 
+#if defined(CONFIG_OF_OVERLAY)
+extern int of_overlay_init(void);
+#else
+static inline int of_overlay_init(void)
+{
+	return 0;
+}
+#endif
+
+extern const struct rhashtable_params of_phandle_ht_params;
+extern struct rhashtable *of_phandle_ht;
+
+/* for unittest use */
+extern bool of_phandle_ht_is_disabled;
+
+static inline bool of_phandle_ht_available(void)
+{
+	return of_phandle_ht != NULL;
+}
+
+static inline int of_phandle_ht_insert(struct device_node *np)
+{
+	if (!np || !np->phandle)
+		return 0;
+	return rhashtable_insert_fast(of_phandle_ht,
+		&np->ht_node, of_phandle_ht_params);
+}
+
+static inline int of_phandle_ht_remove(struct device_node *np)
+{
+	if (!np || !np->phandle)
+		return 0;
+	return rhashtable_remove_fast(of_phandle_ht,
+		&np->ht_node, of_phandle_ht_params);
+}
+
+static inline struct device_node *of_phandle_ht_lookup(phandle handle)
+{
+	return rhashtable_lookup_fast(of_phandle_ht,
+			&handle, of_phandle_ht_params);
+}
+
 #endif /* _LINUX_OF_PRIVATE_H */
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index 318dbb5..27e5c21 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -22,11 +22,28 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/idr.h>
+#include <linux/sysfs.h>
+#include <linux/atomic.h>
 
 #include "of_private.h"
 
+/* fwd. decl */
+struct of_overlay;
+struct of_overlay_info;
+
+/* an attribute for each fragment */
+struct fragment_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct kobject *kobj, struct fragment_attribute *fattr,
+			char *buf);
+	ssize_t (*store)(struct kobject *kobj, struct fragment_attribute *fattr,
+			 const char *buf, size_t count);
+	struct of_overlay_info *ovinfo;
+};
+
 /**
  * struct of_overlay_info - Holds a single overlay info
+ * @info:	info node that contains the target and overlay
  * @target:	target of the overlay operation
  * @overlay:	pointer to the overlay contents node
  *
@@ -34,8 +51,13 @@
  * records.
  */
 struct of_overlay_info {
+	struct of_overlay *ov;
+	struct device_node *info;
 	struct device_node *target;
 	struct device_node *overlay;
+	struct attribute_group attr_group;
+	struct attribute *attrs[2];
+	struct fragment_attribute target_attr;
 };
 
 /**
@@ -52,11 +74,26 @@ struct of_overlay {
 	struct list_head node;
 	int count;
 	struct of_overlay_info *ovinfo_tab;
+	const struct attribute_group **attr_groups;
 	struct of_changeset cset;
+	struct kobject kobj;
+	int target_index;
+	struct device_node *target_root;
 };
 
+/* master enable switch; once set to 0 can't be re-enabled */
+static atomic_t ov_enable = ATOMIC_INIT(1);
+
+static int __init of_overlay_disable_setup(char *str __always_unused)
+{
+	atomic_set(&ov_enable, 0);
+	return 1;
+}
+__setup("of_overlay_disable", of_overlay_disable_setup);
+
 static int of_overlay_apply_one(struct of_overlay *ov,
 		struct device_node *target, const struct device_node *overlay);
+static int overlay_removal_is_ok(struct of_overlay *ov);
 
 static int of_overlay_apply_single_property(struct of_overlay *ov,
 		struct device_node *target, struct property *prop)
@@ -187,30 +224,92 @@ static int of_overlay_apply(struct of_overlay *ov)
 
 /*
  * Find the target node using a number of different strategies
- * in order of preference
+ * in order of preference. Respects the target index if available.
  *
  * "target" property containing the phandle of the target
  * "target-path" property containing the path of the target
  */
-static struct device_node *find_target_node(struct device_node *info_node)
+static struct device_node *find_target_node(struct of_overlay *ov,
+		struct device_node *info_node, int index)
 {
+	struct device_node *target = NULL, *np;
 	const char *path;
+	char *newpath;
 	u32 val;
 	int ret;
 
 	/* first try to go by using the target as a phandle */
-	ret = of_property_read_u32(info_node, "target", &val);
-	if (ret == 0)
-		return of_find_node_by_phandle(val);
+	ret = of_property_read_u32_index(info_node, "target", index, &val);
+	if (ret == 0) {
+		target = of_find_node_by_phandle(val);
+		if (!target) {
+			pr_err("%s: Could not find target phandle 0x%x\n",
+					__func__, val);
+			return NULL;
+		}
+		goto check_root;
+	}
 
-	/* now try to locate by path */
-	ret = of_property_read_string(info_node, "target-path", &path);
-	if (ret == 0)
-		return of_find_node_by_path(path);
+	/* failed, try to locate by path */
+	ret = of_property_read_string_index(info_node, "target-path", index,
+			&path);
+	if (ret == 0) {
+
+		if (!ov->target_root) {
+			target = of_find_node_by_path(path);
+			if (!target)
+				pr_err("%s: Could not find target path \"%s\"\n",
+						__func__, path);
+			return target;
+		}
+
+		/* remove preceding '/' from path; relative path */
+		if (*path == '/') {
+			while (*path == '/')
+				path++;
+
+			newpath = kasprintf(GFP_KERNEL, "%s%s%s",
+					of_node_full_name(ov->target_root),
+					*path ? "/" : "", path);
+			if (!newpath) {
+				pr_err("%s: Could not allocate \"%s%s%s\"\n",
+					__func__,
+					of_node_full_name(ov->target_root),
+					*path ? "/" : "", path);
+				return NULL;
+			}
+			target = of_find_node_by_path(newpath);
+			kfree(newpath);
 
-	pr_err("Failed to find target for node %p (%s)\n",
-		info_node, info_node->name);
+			return target;
 
+		}
+		/* target is an alias, need to check */
+		target = of_find_node_by_path(path);
+		if (!target) {
+			pr_err("%s: Could not find alias \"%s\"\n",
+					__func__, path);
+			return NULL;
+		}
+		goto check_root;
+	}
+
+	return NULL;
+
+check_root:
+	if (!ov->target_root)
+		return target;
+
+	/* got a target, but we have to check it's under target root */
+	for (np = target; np; np = np->parent) {
+		if (np == ov->target_root)
+			return target;
+	}
+	pr_err("%s: target \"%s\" not under target_root \"%s\"\n",
+			__func__, of_node_full_name(target),
+			of_node_full_name(ov->target_root));
+	/* target is not under target_root */
+	of_node_put(target);
 	return NULL;
 }
 
@@ -235,10 +334,12 @@ static int of_fill_overlay_info(struct of_overlay *ov,
 	if (ovinfo->overlay == NULL)
 		goto err_fail;
 
-	ovinfo->target = find_target_node(info_node);
+	ovinfo->target = find_target_node(ov, info_node, ov->target_index);
 	if (ovinfo->target == NULL)
 		goto err_fail;
 
+	ovinfo->info = of_node_get(info_node);
+
 	return 0;
 
 err_fail:
@@ -249,6 +350,17 @@ err_fail:
 	return -EINVAL;
 }
 
+static ssize_t target_show(struct kobject *kobj,
+		struct fragment_attribute *fattr, char *buf)
+{
+	struct of_overlay_info *ovinfo = fattr->ovinfo;
+
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			of_node_full_name(ovinfo->target));
+}
+
+static const struct fragment_attribute target_template_attr = __ATTR_RO(target);
+
 /**
  * of_build_overlay_info() - Build an overlay info array
  * @ov		Overlay to build
@@ -266,7 +378,7 @@ static int of_build_overlay_info(struct of_overlay *ov,
 {
 	struct device_node *node;
 	struct of_overlay_info *ovinfo;
-	int cnt, err;
+	int i, cnt, err;
 
 	/* worst case; every child is a node */
 	cnt = 0;
@@ -287,14 +399,45 @@ static int of_build_overlay_info(struct of_overlay *ov,
 
 	/* if nothing filled, return error */
 	if (cnt == 0) {
-		kfree(ovinfo);
-		return -ENODEV;
+		err = -ENODEV;
+		goto err_free_ovinfo;
 	}
 
 	ov->count = cnt;
 	ov->ovinfo_tab = ovinfo;
 
+	ov->attr_groups = kcalloc(cnt + 1,
+			sizeof(struct attribute_group *), GFP_KERNEL);
+	if (ov->attr_groups == NULL) {
+		err = -ENOMEM;
+		goto err_free_ovinfo;
+	}
+
+	for (i = 0; i < cnt; i++) {
+		ovinfo = &ov->ovinfo_tab[i];
+
+		ov->attr_groups[i] = &ovinfo->attr_group;
+
+		ovinfo->target_attr = target_template_attr;
+		/* make lockdep happy */
+		sysfs_attr_init(&ovinfo->target_attr.attr);
+		ovinfo->target_attr.ovinfo = ovinfo;
+
+		ovinfo->attrs[0] = &ovinfo->target_attr.attr;
+		ovinfo->attrs[1] = NULL;
+
+		/* NOTE: direct reference to the full_name */
+		ovinfo->attr_group.name = kbasename(ovinfo->info->full_name);
+		ovinfo->attr_group.attrs = ovinfo->attrs;
+
+	}
+	ov->attr_groups[i] = NULL;
+
 	return 0;
+
+err_free_ovinfo:
+	kfree(ovinfo);
+	return err;
 }
 
 /**
@@ -311,46 +454,201 @@ static int of_free_overlay_info(struct of_overlay *ov)
 	struct of_overlay_info *ovinfo;
 	int i;
 
+	/* free attribute groups space */
+	kfree(ov->attr_groups);
+
 	/* do it in reverse */
 	for (i = ov->count - 1; i >= 0; i--) {
 		ovinfo = &ov->ovinfo_tab[i];
 
 		of_node_put(ovinfo->target);
 		of_node_put(ovinfo->overlay);
+		of_node_put(ovinfo->info);
 	}
 	kfree(ov->ovinfo_tab);
 
 	return 0;
 }
 
+static int of_overlay_add_symbols(
+		struct device_node *tree,
+		struct of_overlay *ov)
+{
+	struct of_overlay_info *ovinfo;
+	struct device_node *root_sym = NULL;
+	struct device_node *child = NULL;
+	struct property *prop;
+	const char *path, *s;
+	char *new_path;
+	int i, len, err;
+
+	/* both may fail (if no fixups are required) */
+	root_sym = of_find_node_by_path("/__symbols__");
+	child = of_get_child_by_name(tree, "__symbols__");
+
+	err = 0;
+	/* do nothing if either is NULL */
+	if (!root_sym || !child)
+		goto out;
+
+	for_each_property_of_node(child, prop) {
+
+		/* skip properties added automatically */
+		if (of_prop_cmp(prop->name, "name") == 0)
+			continue;
+
+		err = of_property_read_string(child,
+				prop->name, &path);
+		if (err != 0) {
+			pr_err("Could not find symbol '%s'\n", prop->name);
+			continue;
+		}
+
+		/* now find fragment index */
+		s = path;
+
+		/* compare paths to find fragment index */
+		for (i = 0, ovinfo = NULL, len = -1; i < ov->count; i++) {
+			ovinfo = &ov->ovinfo_tab[i];
+
+			pr_debug("#%d: overlay->name=%s target->name=%s\n",
+					i, ovinfo->overlay->full_name,
+					ovinfo->target->full_name);
+
+			len = strlen(ovinfo->overlay->full_name);
+			if (strncasecmp(path, ovinfo->overlay->full_name,
+						len) == 0 && path[len] == '/')
+				break;
+		}
+
+		if (i >= ov->count)
+			continue;
+
+		pr_debug("found target at #%d\n", i);
+		new_path = kasprintf(GFP_KERNEL, "%s%s",
+				ovinfo->target->full_name,
+				path + len);
+		if (!new_path) {
+			pr_err("Failed to allocate propname for \"%s\"\n",
+					prop->name);
+			err = -ENOMEM;
+			break;
+		}
+
+		err = of_changeset_add_property_string(&ov->cset, root_sym,
+				prop->name, new_path);
+
+		/* free always */
+		kfree(new_path);
+
+		if (err) {
+			pr_err("Failed to add property for \"%s\"\n",
+					prop->name);
+			break;
+		}
+	}
+
+out:
+	of_node_put(child);
+	of_node_put(root_sym);
+
+	return err;
+}
+
 static LIST_HEAD(ov_list);
 static DEFINE_IDR(ov_idr);
 
-/**
- * of_overlay_create() - Create and apply an overlay
- * @tree:	Device node containing all the overlays
- *
- * Creates and applies an overlay while also keeping track
- * of the overlay in a list. This list can be used to prevent
- * illegal overlay removals.
- *
- * Returns the id of the created overlay, or a negative error number
- */
-int of_overlay_create(struct device_node *tree)
+static inline struct of_overlay *kobj_to_overlay(struct kobject *kobj)
+{
+	return container_of(kobj, struct of_overlay, kobj);
+}
+
+void of_overlay_release(struct kobject *kobj)
+{
+	struct of_overlay *ov = kobj_to_overlay(kobj);
+
+	of_node_put(ov->target_root);
+	kfree(ov);
+}
+
+static ssize_t enable_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&ov_enable));
+}
+
+static ssize_t enable_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	bool new_enable;
+
+	ret = strtobool(buf, &new_enable);
+	if (ret != 0)
+		return ret;
+	/* if we've disabled it, no going back */
+	if (atomic_read(&ov_enable) == 0)
+		return -EPERM;
+	atomic_set(&ov_enable, (int)new_enable);
+	return count;
+}
+
+static struct kobj_attribute enable_attr = __ATTR_RW(enable);
+
+static const struct attribute *overlay_global_attrs[] = {
+	&enable_attr.attr,
+	NULL
+};
+
+static ssize_t can_remove_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct of_overlay *ov = kobj_to_overlay(kobj);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", overlay_removal_is_ok(ov));
+}
+
+static struct kobj_attribute can_remove_attr = __ATTR_RO(can_remove);
+
+static struct attribute *overlay_attrs[] = {
+	&can_remove_attr.attr,
+	NULL
+};
+
+static struct kobj_type of_overlay_ktype = {
+	.release = of_overlay_release,
+	.sysfs_ops = &kobj_sysfs_ops,	/* default kobj sysfs ops */
+	.default_attrs = overlay_attrs,
+};
+
+static struct kset *ov_kset;
+
+static int __of_overlay_create(struct device_node *tree,
+		int target_index, struct device_node *target_root)
 {
 	struct of_overlay *ov;
 	int err, id;
 
+	/* administratively disabled */
+	if (!atomic_read(&ov_enable))
+		return -EPERM;
+
 	/* allocate the overlay structure */
 	ov = kzalloc(sizeof(*ov), GFP_KERNEL);
 	if (ov == NULL)
 		return -ENOMEM;
 	ov->id = -1;
 
+	ov->target_index = target_index;
+	ov->target_root = of_node_get(target_root);
+
 	INIT_LIST_HEAD(&ov->node);
 
 	of_changeset_init(&ov->cset);
 
+	/* initialize kobject */
+	kobject_init(&ov->kobj, &of_overlay_ktype);
+
 	mutex_lock(&of_mutex);
 
 	id = idr_alloc(&ov_idr, ov, 0, 0, GFP_KERNEL);
@@ -373,19 +671,44 @@ int of_overlay_create(struct device_node *tree)
 	if (err)
 		goto err_abort_trans;
 
+	err = of_overlay_add_symbols(tree, ov);
+	if (err) {
+		pr_err("%s: of_overlay_add_symbols() failed for tree@%s\n",
+				__func__, tree->full_name);
+		goto err_abort_trans;
+	}
+
 	/* apply the changeset */
 	err = __of_changeset_apply(&ov->cset);
 	if (err)
 		goto err_revert_overlay;
 
 
+	ov->kobj.kset = ov_kset;
+	err = kobject_add(&ov->kobj, NULL, "%d", id);
+	if (err != 0) {
+		pr_err("%s: kobject_add() failed for tree@%s\n",
+				__func__, tree->full_name);
+		goto err_cancel_overlay;
+	}
+
+	err = sysfs_create_groups(&ov->kobj, ov->attr_groups);
+	if (err != 0) {
+		pr_err("%s: sysfs_create_groups() failed for tree@%s\n",
+				__func__, tree->full_name);
+		goto err_remove_kobj;
+	}
+
 	/* add to the tail of the overlay list */
 	list_add_tail(&ov->node, &ov_list);
 
 	mutex_unlock(&of_mutex);
 
 	return id;
-
+err_remove_kobj:
+	kobject_put(&ov->kobj);
+err_cancel_overlay:
+	of_changeset_revert(&ov->cset);
 err_revert_overlay:
 err_abort_trans:
 	of_free_overlay_info(ov);
@@ -393,13 +716,66 @@ err_free_idr:
 	idr_remove(&ov_idr, ov->id);
 err_destroy_trans:
 	of_changeset_destroy(&ov->cset);
+	of_node_put(ov->target_root);
 	kfree(ov);
 	mutex_unlock(&of_mutex);
 
 	return err;
 }
+
+/**
+ * of_overlay_create() - Create and apply an overlay
+ * @tree:	Device node containing all the overlays
+ *
+ * Creates and applies an overlay while also keeping track
+ * of the overlay in a list. This list can be used to prevent
+ * illegal overlay removals.
+ *
+ * Returns the id of the created overlay, or a negative error number
+ */
+int of_overlay_create(struct device_node *tree)
+{
+	return __of_overlay_create(tree, 0, NULL);
+}
 EXPORT_SYMBOL_GPL(of_overlay_create);
 
+/**
+ * of_overlay_create_target_index() - Create and apply an overlay
+ * @tree:	Device node containing all the overlays
+ * @index:	Index to use in the target properties
+ *
+ * Creates and applies an overlay while also keeping track
+ * of the overlay in a list. This list can be used to prevent
+ * illegal overlay removals.
+ *
+ * Returns the id of the created overlay, or a negative error number
+ */
+int of_overlay_create_target_index(struct device_node *tree, int index)
+{
+	return __of_overlay_create(tree, index, NULL);
+}
+EXPORT_SYMBOL_GPL(of_overlay_create_target_index);
+
+/**
+ * of_overlay_create_target_root() - Create and apply an overlay
+ *			under which will be limited to target_root
+ * @tree:		Device node containing all the overlays
+ * @target_root:	Target root for the overlay.
+ *
+ * Creates and applies an overlay while also keeping track
+ * of the overlay in a list. This list can be used to prevent
+ * illegal overlay removals. The overlay is only allowed to
+ * target nodes under the target_root node.
+ *
+ * Returns the id of the created overlay, or an negative error number
+ */
+int of_overlay_create_target_root(struct device_node *tree,
+		struct device_node *target_root)
+{
+	return __of_overlay_create(tree, 0, target_root);
+}
+EXPORT_SYMBOL_GPL(of_overlay_create_target_root);
+
 /* check whether the given node, lies under the given tree */
 static int overlay_subtree_check(struct device_node *tree,
 		struct device_node *dn)
@@ -500,11 +876,13 @@ int of_overlay_destroy(int id)
 
 
 	list_del(&ov->node);
+	sysfs_remove_groups(&ov->kobj, ov->attr_groups);
 	__of_changeset_revert(&ov->cset);
 	of_free_overlay_info(ov);
 	idr_remove(&ov_idr, id);
 	of_changeset_destroy(&ov->cset);
-	kfree(ov);
+
+	kobject_put(&ov->kobj);
 
 	err = 0;
 
@@ -534,7 +912,7 @@ int of_overlay_destroy_all(void)
 		__of_changeset_revert(&ov->cset);
 		of_free_overlay_info(ov);
 		idr_remove(&ov_idr, ov->id);
-		kfree(ov);
+		kobject_put(&ov->kobj);
 	}
 
 	mutex_unlock(&of_mutex);
@@ -542,3 +920,18 @@ int of_overlay_destroy_all(void)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(of_overlay_destroy_all);
+
+/* called from of_init() */
+int of_overlay_init(void)
+{
+	int rc;
+
+	ov_kset = kset_create_and_add("overlays", NULL, &of_kset->kobj);
+	if (!ov_kset)
+		return -ENOMEM;
+
+	rc = sysfs_create_files(&ov_kset->kobj, overlay_global_attrs);
+	WARN(rc, "%s: error adding global attributes\n", __func__);
+
+	return rc;
+}
diff --git a/drivers/of/unittest-data/testcases.dts b/drivers/of/unittest-data/testcases.dts
index 12f7c3d..a6ded1b 100644
--- a/drivers/of/unittest-data/testcases.dts
+++ b/drivers/of/unittest-data/testcases.dts
@@ -75,5 +75,15 @@
 				target = <0x00000000>;
 			};
 		};
+		overlay16 {
+			fragment@0 {
+				target = <0x00000000 0x00000004>;
+			};
+		};
+		overlay18 {
+			fragment@0 {
+				target = <0x00000000>;
+			};
+		};
 	};
 }; };
diff --git a/drivers/of/unittest-data/tests-overlay.dtsi b/drivers/of/unittest-data/tests-overlay.dtsi
index 02ba56c..170b04d 100644
--- a/drivers/of/unittest-data/tests-overlay.dtsi
+++ b/drivers/of/unittest-data/tests-overlay.dtsi
@@ -110,6 +110,30 @@
 						};
 					};
 				};
+
+				unittest16: test-unittest16 {
+					compatible = "unittest";
+					status = "disabled";
+					reg = <16>;
+				};
+
+				unittest17: test-unittest17 {
+					compatible = "unittest";
+					status = "disabled";
+					reg = <17>;
+				};
+
+				unittest18: test-unittest18 {
+					compatible = "unittest";
+					status = "disabled";
+					reg = <18>;
+				};
+
+				unittest19: test-unittest19 {
+					compatible = "unittest";
+					status = "disabled";
+					reg = <19>;
+				};
 			};
 		};
 
@@ -325,5 +349,44 @@
 			};
 		};
 
+		/* test enable using indirect functionality */
+		overlay16 {
+			fragment@0 {
+				target = <&unittest17>, <&unittest16>;
+				__overlay__ {
+					status = "okay";
+				};
+			};
+		};
+
+		/* test enable using target root (relative path) */
+		overlay17 {
+			fragment@0 {
+				target-path = "/";
+				__overlay__ {
+					status = "okay";
+				};
+			};
+		};
+
+		/* test enable using target phandle */
+		overlay18 {
+			fragment@0 {
+				target = <&unittest18>;
+				__overlay__ {
+					status = "okay";
+				};
+			};
+		};
+
+		/* test trying to enable out of root (should fail) */
+		overlay19 {
+			fragment@0 {
+				target = <&unittest19>;
+				__overlay__ {
+					status = "okay";
+				};
+			};
+		};
 	};
 };
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 53c83d6..b560ae7 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -24,6 +24,9 @@
 
 #include <linux/bitops.h>
 
+#include <linux/timekeeping.h>
+#include <linux/random.h>
+
 #include "of_private.h"
 
 static struct unittest_results {
@@ -542,6 +545,59 @@ static void __init of_unittest_changeset(void)
 #endif
 }
 
+static void __init of_unittest_changeset_helper(void)
+{
+#ifdef CONFIG_OF_DYNAMIC
+	struct device_node *n1, *n2, *n21, *parent, *np;
+	struct of_changeset chgset;
+
+	of_changeset_init(&chgset);
+
+	parent = of_find_node_by_path("/testcase-data/changeset");
+
+	unittest(parent, "testcase setup failure\n");
+	n1 = of_changeset_create_device_node(&chgset,
+			parent, "/testcase-data/changeset/n1");
+	unittest(n1, "testcase setup failure\n");
+	n2 = of_changeset_create_device_node(&chgset,
+			parent, "/testcase-data/changeset/n2");
+	unittest(n2, "testcase setup failure\n");
+	n21 = of_changeset_create_device_node(&chgset, n2, "%s/%s",
+			"/testcase-data/changeset/n2", "n21");
+	unittest(n21, "testcase setup failure\n");
+
+	unittest(!of_changeset_add_property_string(&chgset, parent,
+				"prop-add", "foo"), "fail add prop\n");
+
+	unittest(!of_changeset_attach_node(&chgset, n1), "fail n1 attach\n");
+	unittest(!of_changeset_attach_node(&chgset, n2), "fail n2 attach\n");
+	unittest(!of_changeset_attach_node(&chgset, n21), "fail n21 attach\n");
+
+	unittest(!of_changeset_apply(&chgset), "apply failed\n");
+
+	/* Make sure node names are constructed correctly */
+	np = of_find_node_by_path("/testcase-data/changeset/n1");
+	unittest(np, "'%s' not added\n", n1->full_name);
+	of_node_put(np);
+
+	/* Make sure node names are constructed correctly */
+	np = of_find_node_by_path("/testcase-data/changeset/n2");
+	unittest(np, "'%s' not added\n", n2->full_name);
+	of_node_put(np);
+
+	np = of_find_node_by_path("/testcase-data/changeset/n2/n21");
+	unittest(np, "'%s' not added\n", n21->full_name);
+	of_node_put(np);
+
+	unittest(!of_changeset_revert(&chgset), "revert failed\n");
+
+	of_changeset_destroy(&chgset);
+
+	of_node_put(parent);
+#endif
+}
+
+
 static void __init of_unittest_parse_interrupts(void)
 {
 	struct device_node *np;
@@ -877,7 +933,7 @@ static int attach_node_and_children(struct device_node *np)
 	of_node_clear_flag(np, OF_DETACHED);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-	__of_attach_node_sysfs(np);
+	__of_attach_node_post(np);
 	mutex_unlock(&of_mutex);
 
 	while (child) {
@@ -935,7 +991,7 @@ static int __init unittest_data_add(void)
 	if (!of_root) {
 		of_root = unittest_data_node;
 		for_each_of_allnodes(np)
-			__of_attach_node_sysfs(np);
+			__of_attach_node_post(np);
 		of_aliases = of_find_node_by_path("/aliases");
 		of_chosen = of_find_node_by_path("/chosen");
 		return 0;
@@ -1854,6 +1910,273 @@ static inline void of_unittest_overlay_i2c_15(void) { }
 
 #endif
 
+static void of_unittest_overlay_16(void)
+{
+	int ret;
+	int overlay_nr = 16;
+	int unittest_nr = 16;
+	enum overlay_type ovtype = PDEV_OVERLAY;
+	int before = 0;
+	int after = 1;
+	struct device_node *np = NULL;
+	int id = -1;
+
+	/* unittest device must not be in before state */
+	if (of_unittest_device_exists(unittest_nr, ovtype) != before) {
+		unittest(0, "overlay @\"%s\" with device @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				unittest_path(unittest_nr, ovtype),
+				!before ? "enabled" : "disabled");
+		return;
+	}
+
+	np = of_find_node_by_path(overlay_path(overlay_nr));
+	if (np == NULL) {
+		unittest(0, "could not find overlay node @\"%s\"\n",
+				overlay_path(overlay_nr));
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* unittest16 is at index #1 */
+	ret = of_overlay_create_target_index(np, 1);
+	if (ret < 0) {
+		unittest(0, "could not create overlay from \"%s\"\n",
+				overlay_path(overlay_nr));
+		goto out;
+	}
+	id = ret;
+	of_unittest_track_overlay(id);
+
+	ret = 0;
+
+out:
+	of_node_put(np);
+
+	if (ret)
+		return;
+
+	/* unittest device must be to set to after state */
+	if (of_unittest_device_exists(unittest_nr, ovtype) != after) {
+		unittest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				unittest_path(unittest_nr, ovtype),
+				!after ? "enabled" : "disabled");
+		return;
+	}
+
+	unittest(1, "overlay test %d passed\n", 16);
+}
+
+static void of_unittest_overlay_17(void)
+{
+	int ret;
+	int overlay_nr = 17;
+	int unittest_nr = 17;
+	enum overlay_type ovtype = PDEV_OVERLAY;
+	int before = 0;
+	int after = 1;
+	const char *root_path;
+	struct device_node *np = NULL, *target_root = NULL;
+	int id = -1;
+
+	/* unittest device must not be in before state */
+	if (of_unittest_device_exists(unittest_nr, ovtype) != before) {
+		unittest(0, "overlay @\"%s\" with device @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				unittest_path(unittest_nr, ovtype),
+				!before ? "enabled" : "disabled");
+		return;
+	}
+
+	np = of_find_node_by_path(overlay_path(overlay_nr));
+	if (np == NULL) {
+		unittest(0, "could not find overlay node @\"%s\"\n",
+				overlay_path(overlay_nr));
+		ret = -EINVAL;
+		goto out;
+	}
+
+	root_path = "/testcase-data/overlay-node/test-bus/test-unittest17";
+	target_root = of_find_node_by_path(root_path);
+	if (!target_root) {
+		unittest(0, "could not find target_root node @\"%s\"\n",
+				root_path);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = of_overlay_create_target_root(np, target_root);
+	of_node_put(target_root);
+
+	if (ret < 0) {
+		unittest(0, "could not create overlay from \"%s\"\n",
+				overlay_path(overlay_nr));
+		goto out;
+	}
+	id = ret;
+	of_unittest_track_overlay(id);
+
+	ret = 0;
+
+out:
+	of_node_put(np);
+
+	if (ret)
+		return;
+
+	/* unittest device must be to set to after state */
+	if (of_unittest_device_exists(unittest_nr, ovtype) != after) {
+		unittest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				unittest_path(unittest_nr, ovtype),
+				!after ? "enabled" : "disabled");
+		return;
+	}
+
+	unittest(1, "overlay test %d passed\n", 17);
+}
+
+static void of_unittest_overlay_18(void)
+{
+	int ret;
+	int overlay_nr = 18;
+	int unittest_nr = 18;
+	enum overlay_type ovtype = PDEV_OVERLAY;
+	int before = 0;
+	int after = 1;
+	const char *root_path;
+	struct device_node *np = NULL, *target_root = NULL;
+	int id = -1;
+
+	/* unittest device must not be in before state */
+	if (of_unittest_device_exists(unittest_nr, ovtype) != before) {
+		unittest(0, "overlay @\"%s\" with device @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				unittest_path(unittest_nr, ovtype),
+				!before ? "enabled" : "disabled");
+		return;
+	}
+
+	np = of_find_node_by_path(overlay_path(overlay_nr));
+	if (np == NULL) {
+		unittest(0, "could not find overlay node @\"%s\"\n",
+				overlay_path(overlay_nr));
+		ret = -EINVAL;
+		goto out;
+	}
+
+	root_path = "/testcase-data/overlay-node/test-bus/test-unittest18";
+	target_root = of_find_node_by_path(root_path);
+	if (!target_root) {
+		unittest(0, "could not find target_root node @\"%s\"\n",
+				root_path);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = of_overlay_create_target_root(np, target_root);
+	of_node_put(target_root);
+
+	if (ret < 0) {
+		unittest(0, "could not create overlay from \"%s\"\n",
+				overlay_path(overlay_nr));
+		goto out;
+	}
+	id = ret;
+	of_unittest_track_overlay(id);
+
+	ret = 0;
+
+out:
+	of_node_put(np);
+
+	if (ret)
+		return;
+
+	/* unittest device must be to set to after state */
+	if (of_unittest_device_exists(unittest_nr, ovtype) != after) {
+		unittest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				unittest_path(unittest_nr, ovtype),
+				!after ? "enabled" : "disabled");
+		return;
+	}
+
+	unittest(1, "overlay test %d passed\n", 18);
+}
+
+static void of_unittest_overlay_19(void)
+{
+	int ret;
+	int overlay_nr = 19;
+	int unittest_nr = 19;
+	enum overlay_type ovtype = PDEV_OVERLAY;
+	int before = 0;
+	int after = 0;
+	const char *root_path;
+	struct device_node *np = NULL, *target_root = NULL;
+	int id = -1;
+
+	/* unittest device must not be in before state */
+	if (of_unittest_device_exists(unittest_nr, ovtype) != before) {
+		unittest(0, "overlay @\"%s\" with device @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				unittest_path(unittest_nr, ovtype),
+				!before ? "enabled" : "disabled");
+		return;
+	}
+
+	np = of_find_node_by_path(overlay_path(overlay_nr));
+	if (np == NULL) {
+		unittest(0, "could not find overlay node @\"%s\"\n",
+				overlay_path(overlay_nr));
+		ret = -EINVAL;
+		goto out;
+	}
+
+	root_path = "/testcase-data/overlay-node/test-bus/test-unittest19";
+	target_root = of_find_node_by_path(root_path);
+	if (!target_root) {
+		unittest(0, "could not find target_root node @\"%s\"\n",
+				root_path);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = of_overlay_create_target_root(np, target_root);
+	of_node_put(target_root);
+
+	if (ret >= 0) {
+		unittest(0, "created overlay from \"%s\" while we shouldn't\n",
+				overlay_path(overlay_nr));
+		id = ret;
+		of_unittest_track_overlay(id);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = 0;
+
+out:
+	of_node_put(np);
+
+	if (ret)
+		return;
+
+	/* unittest device must be to set to after state */
+	if (of_unittest_device_exists(unittest_nr, ovtype) != after) {
+		unittest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				unittest_path(unittest_nr, ovtype),
+				!after ? "enabled" : "disabled");
+		return;
+	}
+
+	unittest(1, "overlay test %d passed\n", 16);
+}
+
+
 static void __init of_unittest_overlay(void)
 {
 	struct device_node *bus_np = NULL;
@@ -1904,6 +2227,12 @@ static void __init of_unittest_overlay(void)
 	of_unittest_overlay_10();
 	of_unittest_overlay_11();
 
+	of_unittest_overlay_16();
+
+	of_unittest_overlay_17();
+	of_unittest_overlay_18();
+	of_unittest_overlay_19();
+
 #if IS_BUILTIN(CONFIG_I2C)
 	if (unittest(of_unittest_overlay_i2c_init() == 0, "i2c init failed\n"))
 		goto out;
@@ -1926,6 +2255,70 @@ out:
 static inline void __init of_unittest_overlay(void) { }
 #endif
 
+#define PHANDLE_LOOKUPS	1000
+
+static void __init of_unittest_phandle_hash(void)
+{
+	struct device_node *node;
+	phandle max_phandle;
+	u32 ph;
+	unsigned long flags;
+	int i, j, total;
+	ktime_t start, end;
+	s64 dur[2];
+	int dec, frac;
+
+	/* test only available when hashing is available */
+	if (!of_phandle_ht_available()) {
+		pr_warn("phandle hash test requires hash to be initialized\n");
+		return;
+	}
+
+	/* find the maximum phandle of the tree */
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	max_phandle = 0;
+	total = 0;
+	for_each_of_allnodes(node) {
+		if (node->phandle != (phandle)-1U &&
+				node->phandle > max_phandle)
+			max_phandle = node->phandle;
+		total++;
+	}
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+	max_phandle++;
+
+	pr_debug("phandle: max-phandle #%u, #%d total nodes\n",
+			(u32)max_phandle, total);
+
+	/* perform random lookups using the hash */
+	for (j = 0; j < 2; j++) {
+
+		/* disabled for pass #0, enabled for pass #1 */
+		of_phandle_ht_is_disabled = j == 0;
+
+		start = ktime_get_raw();
+		for (i = 0; i < PHANDLE_LOOKUPS; i++) {
+			ph = prandom_u32() % max_phandle;
+			node = of_find_node_by_phandle(ph);
+			of_node_put(node);
+		}
+		end = ktime_get_raw();
+
+		dur[j] = ktime_to_us(end) - ktime_to_us(start);
+		pr_debug("#%d lookups in %lld us (%s)\n",
+				PHANDLE_LOOKUPS, dur[j],
+				j == 0 ? "original" : "hashed");
+	}
+
+	unittest(dur[0] > dur[1], "Non hashing phandles are faster!?");
+
+	dec = (int)div64_s64(dur[0] * 10 + 5, dur[1]);
+	frac = dec % 10;
+	dec /= 10;
+	pr_info("the hash method is %d.%d times faster than the original\n",
+			dec, frac);
+}
+
 static int __init of_unittest(void)
 {
 	struct device_node *np;
@@ -1954,11 +2347,13 @@ static int __init of_unittest(void)
 	of_unittest_property_string();
 	of_unittest_property_copy();
 	of_unittest_changeset();
+	of_unittest_changeset_helper();
 	of_unittest_parse_interrupts();
 	of_unittest_parse_interrupts_extended();
 	of_unittest_match_node();
 	of_unittest_platform_populate();
 	of_unittest_overlay();
+	of_unittest_phandle_hash();
 
 	/* Double check linkage after removing testcase data */
 	of_unittest_check_tree_linkage();
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index 671610c..f90bed9 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -180,6 +180,18 @@ config PINCTRL_ST
 	select PINCONF
 	select GPIOLIB_IRQCHIP
 
+config PINCTRL_TI_IODELAY
+	bool "TI IODelay Module pinconf driver"
+	depends on OF
+	select PINCONF
+	select GENERIC_PINCONF
+	select REGMAP_MMIO
+	help
+	  Say Y here to support Texas Instruments' IODelay pinconf driver.
+	  IODelay module is used for the DRA7 SoC family. This driver is in
+	  addition to PINCTRL_SINGLE which controls the mux.
+
+
 config PINCTRL_TZ1090
 	bool "Toumaz Xenif TZ1090 pin control driver"
 	depends on SOC_TZ1090
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index 11bad37..23e118e 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_PINCTRL_ROCKCHIP)	+= pinctrl-rockchip.o
 obj-$(CONFIG_PINCTRL_SINGLE)	+= pinctrl-single.o
 obj-$(CONFIG_PINCTRL_SIRF)	+= sirf/
 obj-$(CONFIG_ARCH_TEGRA)	+= tegra/
+obj-$(CONFIG_PINCTRL_TI_IODELAY)+= pinctrl-ti-iodelay.o
 obj-$(CONFIG_PINCTRL_TZ1090)	+= pinctrl-tz1090.o
 obj-$(CONFIG_PINCTRL_TZ1090_PDC)	+= pinctrl-tz1090-pdc.o
 obj-$(CONFIG_PINCTRL_U300)	+= pinctrl-u300.o
diff --git b/drivers/pinctrl/pinctrl-ti-iodelay.c b/drivers/pinctrl/pinctrl-ti-iodelay.c
new file mode 100644
index 0000000..8d33414
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-ti-iodelay.c
@@ -0,0 +1,968 @@
+/*
+ * Support for configuration of IO Delay module found on Texas Instruments SoCs
+ * such as DRA7
+ *
+ * Copyright (C) 2015 Texas Instruments, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#define IODELAY_REG_NAME_LEN		((sizeof(u32) * 2) + 3)
+#define DRIVER_NAME			"ti-io-delay"
+/* Should I change this? Abuse? */
+#define IODELAY_MUX_PINS_NAME		"pinctrl-single,pins"
+
+/* Device tree match, populated later */
+static const struct of_device_id ti_iodelay_of_match[];
+
+/**
+ * struct ti_iodelay_conf_vals - Description of each configuration parameters.
+ * @offset:	Configuration register offset
+ * @a_delay:	Agnostic Delay (in ps)
+ * @g_delay:	Gnostic Delay (in ps)
+ */
+struct ti_iodelay_conf_vals {
+	u16 offset;
+	u16 a_delay;
+	u16 g_delay;
+};
+
+/**
+ * struct ti_iodelay_reg_data - Describes the registers for the IOdelay instance
+ * @signature_mask:	Conf reg- mask for the signature bits
+ * @signature_value:	Conf reg- signature value to be written (see TRM)
+ * @lock_mask:		Conf reg- mask for the lock bits
+ * @lock_val:		Conf reg- lock value for the lock bits (see TRM)
+ * @unlock_val:		Conf reg- unlock value for the lock bits (see TRM)
+ * @binary_data_coarse_mask: Conf reg- coarse mask (see TRM)
+ * @binary_data_fine_mask: Conf reg- fine mask (see TRM)
+ * @reg_refclk_offset:	Refclk register offset
+ * @refclk_period_mask:	Refclk mask
+ * @reg_coarse_offset:	Coarse register configuration offset
+ * @coarse_delay_count_mask: Coarse delay count mask
+ * @coarse_ref_count_mask: Coarse ref count mask
+ * @reg_fine_offset:	Fine register configuration offset
+ * @fine_delay_count_mask: Fine delay count mask
+ * @fine_ref_count_mask: Fine  ref count mask
+ * @reg_global_lock_offset: Global(for the IOdelay module) lock register offset
+ * @global_lock_mask:	Lock mask
+ * @global_unlock_val:	unlock value
+ * @global_lock_val:	lock value
+ * @reg_start_offset:	Where does the configuration registers start?
+ * @regmap_config:	Regmap configuration for the IODelay region
+ */
+struct ti_iodelay_reg_data {
+	u32 signature_mask;
+	u32 signature_value;
+	u32 lock_mask;
+	u32 lock_val;
+	u32 unlock_val;
+	u32 binary_data_coarse_mask;
+	u32 binary_data_fine_mask;
+
+	u32 reg_refclk_offset;
+	u32 refclk_period_mask;
+
+	u32 reg_coarse_offset;
+	u32 coarse_delay_count_mask;
+	u32 coarse_ref_count_mask;
+
+	u32 reg_fine_offset;
+	u32 fine_delay_count_mask;
+	u32 fine_ref_count_mask;
+
+	u32 reg_global_lock_offset;
+	u32 global_lock_mask;
+	u32 global_unlock_val;
+	u32 global_lock_val;
+
+	u32 reg_start_offset;
+
+	struct regmap_config *regmap_config;
+};
+
+/**
+ * struct ti_iodelay_reg_values - Computed io_reg configuration values (see TRM)
+ * @coarse_ref_count:	Coarse reference count
+ * @coarse_delay_count:	Coarse delay count
+ * @fine_ref_count:	Fine reference count
+ * @fine_delay_count:	Fine Delay count
+ * @ref_clk_period:	Reference Clock period
+ * @cdpe:		Coarse delay parameter
+ * @fdpe:		Fine delay parameter
+ */
+struct ti_iodelay_reg_values {
+	u16 coarse_ref_count;
+	u16 coarse_delay_count;
+
+	u16 fine_ref_count;
+	u16 fine_delay_count;
+
+	u16 ref_clk_period;
+
+	u32 cdpe;
+	u32 fdpe;
+};
+
+/**
+ * struct ti_iodelay_pin_name - name of the pins
+ * @name: name
+ */
+struct ti_iodelay_pin_name {
+	char name[IODELAY_REG_NAME_LEN];
+};
+
+/**
+ * struct ti_iodelay_pingroup - Structure that describes one group
+ * @np:		Node pointer (device tree)
+ * @name:	Name of the group
+ * @map:	pinctrl map allocated for the group
+ * @vals:	configuration values allocated for the group (from dt)
+ * @nvals:	number of configuration values allocated
+ * @config:	pinconf "Config" - currently a dummy value
+ * @node:	list node to next group
+ */
+struct ti_iodelay_pingroup {
+	struct device_node *np;
+	const char *name;
+	struct pinctrl_map *map;
+	struct ti_iodelay_conf_vals *vals;
+	int nvals;
+	unsigned long config;
+	struct list_head node;
+};
+
+/**
+ * struct ti_iodelay_device - Represents information for a IOdelay instance
+ * @dev: device pointer
+ * @reg_base: Remapped virtual address
+ * @regmap: Regmap for this IOdelay instance
+ * @pctl: Pinctrl device
+ * @desc: pinctrl descriptor for pctl
+ * @pa: pinctrl pin wise description
+ * @names: names of the pins
+ * @groups: list of pinconf groups for iodelay instance
+ * @ngroups: number of groups in the list
+ * @mutex: mutex to protect group list modification
+ * @reg_data: Register definition data for the IODelay instance
+ * @reg_init_conf_values: Initial configuration values.
+ */
+struct ti_iodelay_device {
+	struct device *dev;
+	void __iomem *reg_base;
+	struct regmap *regmap;
+
+	struct pinctrl_dev *pctl;
+	struct pinctrl_desc desc;
+	struct pinctrl_pin_desc *pa;
+	struct ti_iodelay_pin_name *names;
+
+	struct list_head groups;
+	int ngroups;
+	struct mutex mutex; /* list protection */
+
+	const struct ti_iodelay_reg_data *reg_data;
+	struct ti_iodelay_reg_values reg_init_conf_values;
+};
+
+/*--- IOdelay configuration stuff ----*/
+
+/**
+ * ti_iodelay_extract() - extract bits for a field
+ * @val:	register value
+ * @mask:	Mask
+ *
+ * Return: extracted value which is appropriately shifted
+ */
+static inline u32 ti_iodelay_extract(u32 val, u32 mask)
+{
+	return (val & mask) >> __ffs(mask);
+}
+
+/**
+ * ti_iodelay_compute_dpe() - Compute equation for delay parameter
+ * @period:	Period to use
+ * @ref:	Reference Count
+ * @delay:	Delay count
+ * @delay_m:	Delay multiplier
+ *
+ * Return: Computed delay parameter
+ */
+static inline u32 ti_iodelay_compute_dpe(u16 period, u16 ref, u16 delay,
+					 u16 delay_m)
+{
+	u64 m, d;
+
+	/* Handle overflow conditions */
+	m = 10 * (u64)period * (u64)ref;
+	d = 2 * (u64)delay * (u64)delay_m;
+
+	/* Truncate result back to 32 bits */
+	return div64_u64(m, d);
+}
+
+/**
+ * ti_iodelay_pinconf_set() - Configure the pin configuration
+ * @iod:	IODelay device
+ * @val:	Configuration value
+ *
+ * Update the configuration register as per TRM and lockup once done.
+ * *IMPORTANT NOTE* SoC TRM does recommend doing iodelay programmation only
+ * while in Isolation. But, then, isolation also implies that every pin
+ * on the SoC(including DDR) will be isolated out. The only benefit being
+ * a glitchless configuration, However, the intent of this driver is purely
+ * to support a "glitchy" configuration where applicable.
+ *
+ * Return: 0 in case of success, else appropriate error value
+ */
+static int ti_iodelay_pinconf_set(struct ti_iodelay_device *iod,
+				  struct ti_iodelay_conf_vals *val)
+{
+	const struct ti_iodelay_reg_data *reg = iod->reg_data;
+	struct ti_iodelay_reg_values *ival = &iod->reg_init_conf_values;
+	struct device *dev = iod->dev;
+	u32 g_delay_coarse, g_delay_fine;
+	u32 a_delay_coarse, a_delay_fine;
+	u32 c_elements, f_elements;
+	u32 total_delay;
+	u32 reg_mask, reg_val, tmp_val;
+	int r;
+
+	/* NOTE: Truncation is expected in all division below */
+	g_delay_coarse = val->g_delay / 920;
+	g_delay_fine = ((val->g_delay % 920) * 10) / 60;
+
+	a_delay_coarse = val->a_delay / ival->cdpe;
+	a_delay_fine = ((val->a_delay % ival->cdpe) * 10) / ival->fdpe;
+
+	c_elements = g_delay_coarse + a_delay_coarse;
+	f_elements = (g_delay_fine + a_delay_fine) / 10;
+
+	if (f_elements > 22) {
+		total_delay = c_elements * ival->cdpe + f_elements * ival->fdpe;
+		c_elements = total_delay / ival->cdpe;
+		f_elements = (total_delay % ival->cdpe) / ival->fdpe;
+	}
+
+	reg_mask = reg->signature_mask;
+	reg_val = reg->signature_value << __ffs(reg->signature_mask);
+
+	reg_mask |= reg->binary_data_coarse_mask;
+	tmp_val = c_elements << __ffs(reg->binary_data_coarse_mask);
+	if (tmp_val & ~reg->binary_data_coarse_mask) {
+		dev_err(dev, "Masking overflow of coarse elements %08x\n",
+			tmp_val);
+		tmp_val &= reg->binary_data_coarse_mask;
+	}
+	reg_val |= tmp_val;
+
+	reg_mask |= reg->binary_data_fine_mask;
+	tmp_val = f_elements << __ffs(reg->binary_data_fine_mask);
+	if (tmp_val & ~reg->binary_data_fine_mask) {
+		dev_err(dev, "Masking overflow of fine elements %08x\n",
+			tmp_val);
+		tmp_val &= reg->binary_data_fine_mask;
+	}
+	reg_val |= tmp_val;
+
+	/*
+	 * NOTE: we leave the iodelay values unlocked - this is to work around
+	 * situations such as those found with mmc mode change.
+	 * However, this leaves open any unwarranted changes to padconf register
+	 * impacting iodelay configuration. Use with care!
+	 */
+	reg_mask |= reg->lock_mask;
+	reg_val |= reg->unlock_val << __ffs(reg->lock_mask);
+	r = regmap_update_bits(iod->regmap, val->offset, reg_mask, reg_val);
+
+	dev_dbg(dev, "Set reg 0x%x Delay(a=%d g=%d), Elements(C=%d F=%d)0x%x\n",
+		val->offset, val->a_delay, val->g_delay, c_elements,
+		f_elements, reg_val);
+
+	return r;
+}
+
+/**
+ * ti_iodelay_pinconf_init_dev() - Initialize IODelay device
+ * @iod:	IODelay device
+ *
+ * Unlocks the IODelay region, computes the common parameters
+ *
+ * Return: 0 in case of success, else appropriate error value
+ */
+static int ti_iodelay_pinconf_init_dev(struct ti_iodelay_device *iod)
+{
+	const struct ti_iodelay_reg_data *reg = iod->reg_data;
+	struct device *dev = iod->dev;
+	struct ti_iodelay_reg_values *ival = &iod->reg_init_conf_values;
+	u32 val;
+	int r;
+
+	/* unlock the IOdelay region */
+	r = regmap_update_bits(iod->regmap, reg->reg_global_lock_offset,
+			       reg->global_lock_mask, reg->global_unlock_val);
+	if (r)
+		return r;
+
+	/* Read up Recalibration sequence done by bootloader */
+	r = regmap_read(iod->regmap, reg->reg_refclk_offset, &val);
+	if (r)
+		return r;
+	ival->ref_clk_period = ti_iodelay_extract(val, reg->refclk_period_mask);
+	dev_dbg(dev, "refclk_period=0x%04x\n", ival->ref_clk_period);
+
+	r = regmap_read(iod->regmap, reg->reg_coarse_offset, &val);
+	if (r)
+		return r;
+	ival->coarse_ref_count =
+	    ti_iodelay_extract(val, reg->coarse_ref_count_mask);
+	ival->coarse_delay_count =
+	    ti_iodelay_extract(val, reg->coarse_delay_count_mask);
+	if (!ival->coarse_delay_count) {
+		dev_err(dev, "Invalid Coarse delay count (0) (reg=0x%08x)\n",
+			val);
+		return -EINVAL;
+	}
+	ival->cdpe = ti_iodelay_compute_dpe(ival->ref_clk_period,
+					    ival->coarse_ref_count,
+					    ival->coarse_delay_count, 88);
+	if (!ival->cdpe) {
+		dev_err(dev, "Invalid cdpe computed params = %d %d %d\n",
+			ival->ref_clk_period, ival->coarse_ref_count,
+			ival->coarse_delay_count);
+		return -EINVAL;
+	}
+	dev_dbg(iod->dev, "coarse: ref=0x%04x delay=0x%04x cdpe=0x%08x\n",
+		ival->coarse_ref_count, ival->coarse_delay_count, ival->cdpe);
+
+	r = regmap_read(iod->regmap, reg->reg_fine_offset, &val);
+	if (r)
+		return r;
+	ival->fine_ref_count =
+	    ti_iodelay_extract(val, reg->fine_ref_count_mask);
+	ival->fine_delay_count =
+	    ti_iodelay_extract(val, reg->fine_delay_count_mask);
+	if (!ival->fine_delay_count) {
+		dev_err(dev, "Invalid Fine delay count (0) (reg=0x%08x)\n",
+			val);
+		return -EINVAL;
+	}
+	ival->fdpe = ti_iodelay_compute_dpe(ival->ref_clk_period,
+					    ival->fine_ref_count,
+					    ival->fine_delay_count, 264);
+	if (!ival->fdpe) {
+		dev_err(dev, "Invalid fdpe(0) computed params = %d %d %d\n",
+			ival->ref_clk_period, ival->fine_ref_count,
+			ival->fine_delay_count);
+		return -EINVAL;
+	}
+	dev_dbg(iod->dev, "fine: ref=0x%04x delay=0x%04x fdpe=0x%08x\n",
+		ival->fine_ref_count, ival->fine_delay_count, ival->fdpe);
+
+	return 0;
+}
+
+/**
+ * ti_iodelay_pinconf_deinit_dev() - deinit the IOdelay device
+ * @iod:	IODelay device
+ *
+ * Deinitialize the IODelay device (basically just lock the region back up.
+ */
+static void ti_iodelay_pinconf_deinit_dev(struct ti_iodelay_device *iod)
+{
+	const struct ti_iodelay_reg_data *reg = iod->reg_data;
+
+	/* lock the IOdelay region back again */
+	regmap_update_bits(iod->regmap, reg->reg_global_lock_offset,
+			   reg->global_lock_mask, reg->global_lock_val);
+}
+
+/*--- Pinctrl/pinconf framework stuff ----*/
+
+/**
+ * ti_iodelay_get_group() - Find the group mapped by a group selector
+ * @iod:	IODelay device
+ * @gselector:	Group Selector
+ *
+ * Return: Corresponding group representing group selector in list of groups
+ * managed in IOdelay device OR NULL if not found.
+ */
+static struct ti_iodelay_pingroup *ti_iodelay_get_group(struct ti_iodelay_device
+							*iod,
+							unsigned gselector)
+{
+	struct ti_iodelay_pingroup *group;
+	int gid = 0;
+
+	list_for_each_entry(group, &iod->groups, node) {
+		if (gid == gselector)
+			return group;
+		gid++;
+	}
+
+	dev_err(iod->dev, "%s could not find pingroup %i\n", __func__,
+		gselector);
+	return NULL;
+}
+
+/**
+ * ti_iodelay_dt_node_to_map() - Map a device tree node to appropriate group
+ * @pctldev:	pinctrl device representing IODelay device
+ * @np:		Node Pointer (device tree)
+ * @map:	Pinctrl Map returned back to pinctrl framework
+ * @num_maps:	Number of maps (1)
+ *
+ * Maps the device tree description into a group of configuration parameters
+ * for IOdelay block entry.
+ *
+ * Return: 0 in case of success, else appropriate error value
+ */
+static int ti_iodelay_dt_node_to_map(struct pinctrl_dev *pctldev,
+				     struct device_node *np,
+				     struct pinctrl_map **map,
+				     unsigned *num_maps)
+{
+	struct ti_iodelay_device *iod;
+	struct device *dev;
+	/* const char **pgnames; */
+	int ret = 0;
+	const __be32 *mux;
+	struct ti_iodelay_conf_vals *vals;
+	struct ti_iodelay_pingroup *group;
+	int size, index, idx, rows;
+	u32 offset, val;
+
+	iod = pinctrl_dev_get_drvdata(pctldev);
+	if (!iod)
+		return -EINVAL;
+	dev = iod->dev;
+
+	*map = devm_kzalloc(dev, sizeof(**map), GFP_KERNEL);
+	if (!*map)
+		return -ENOMEM;
+	*num_maps = 0;
+
+	group = devm_kzalloc(dev, sizeof(*group), GFP_KERNEL);
+	if (!group) {
+		ret = -ENOMEM;
+		goto free_map;
+	}
+
+	mux = of_get_property(np, IODELAY_MUX_PINS_NAME, &size);
+	if ((!mux) || (size < sizeof(*mux) * 2)) {
+		dev_err(dev, "bad data for mux %s\n", np->name);
+		ret = -EINVAL;
+		goto free_group;
+	}
+
+	size /= sizeof(*mux);	/* Number of elements in array */
+	rows = size / 2;
+
+	vals = devm_kzalloc(dev, sizeof(*vals) * rows, GFP_KERNEL);
+	if (!vals) {
+		ret = -ENOMEM;
+		goto free_group;
+	}
+
+	index = 0;
+	idx = 0;
+	while (index < size) {
+		offset = be32_to_cpup(mux + index++);
+		val = be32_to_cpup(mux + index++);
+		vals[idx].offset = offset;
+		vals[idx].a_delay = val & 0xFFFF;
+		vals[idx].g_delay = (val & 0xFFFF0000) >> 16;
+		if (offset > iod->reg_data->regmap_config->max_register) {
+			dev_err(dev, "Invalid offset for %s 0x%x\n",
+				np->name, offset);
+			break;
+		}
+		dev_dbg(dev, "%s offset=%x a_delay = %d g_delay = %d\n",
+			np->name, vals[idx].offset, vals[idx].a_delay,
+			vals[idx].g_delay);
+		idx++;
+	}
+
+	group->name = np->name;
+	group->np = np;
+	group->vals = vals;
+	group->nvals = idx;
+	group->config = PIN_CONFIG_END;
+	group->map = *map;
+
+	/* Add to group list */
+	mutex_lock(&iod->mutex);
+	list_add_tail(&group->node, &iod->groups);
+	iod->ngroups++;
+	mutex_unlock(&iod->mutex);
+
+	(*map)->type = PIN_MAP_TYPE_CONFIGS_GROUP;
+	(*map)->data.configs.group_or_pin = np->name;
+	(*map)->data.configs.configs = &group->config;
+	(*map)->data.configs.num_configs = 1;
+	*num_maps = 1;
+
+	return 0;
+
+free_group:
+	devm_kfree(dev, group);
+free_map:
+	devm_kfree(dev, *map);
+	return ret;
+}
+
+/**
+ * ti_iodelay_dt_free_map() - Free map and resource alloted as per the map
+ * @pctldev:	pinctrl device representing IODelay device
+ * @map:	Map allocated by ti_iodelay_dt_node_to_map
+ * @num_maps:	Num maps (1)
+ *
+ * Removes the group associated with the map and frees all resources allocated
+ * for the group.
+ */
+static void ti_iodelay_dt_free_map(struct pinctrl_dev *pctldev,
+				   struct pinctrl_map *map, unsigned num_maps)
+{
+	struct ti_iodelay_device *iod;
+	struct device *dev;
+	struct ti_iodelay_pingroup *group;
+	bool found = false;
+
+	if (!map)
+		return;
+
+	iod = pinctrl_dev_get_drvdata(pctldev);
+	if (!iod)
+		return;
+	dev = iod->dev;
+
+	mutex_lock(&iod->mutex);
+	list_for_each_entry(group, &iod->groups, node) {
+		if (group->map == map) {
+			found = true;
+			list_del(&group->node);
+			iod->ngroups--;
+			break;
+		}
+	}
+	mutex_unlock(&iod->mutex);
+
+	/* If some freaky pinconf framework bug... */
+	if (!found)
+		return;
+
+	devm_kfree(dev, group->vals);
+	devm_kfree(dev, group);
+	devm_kfree(dev, map);
+}
+
+/**
+ * ti_iodelay_pinctrl_get_groups_count() - Get number of groups registered
+ * @pctldev:	pinctrl device representing IODelay device
+ *
+ * Return: number of groups mapped on the IODelay
+ */
+static int ti_iodelay_pinctrl_get_groups_count(struct pinctrl_dev *pctldev)
+{
+	struct ti_iodelay_device *iod;
+	struct device *dev;
+
+	iod = pinctrl_dev_get_drvdata(pctldev);
+	dev = iod->dev;
+
+	return iod->ngroups;
+}
+
+/**
+ * ti_iodelay_pinctrl_get_group_name() - Get the group name
+ * @pctldev:	pinctrl device representing IODelay device
+ * @gselector:	group selector
+ *
+ * Return: name of the Group given a valid gselector, else NULL.
+ */
+static const char *ti_iodelay_pinctrl_get_group_name(struct pinctrl_dev
+						     *pctldev,
+						     unsigned gselector)
+{
+	struct ti_iodelay_device *iod;
+	struct device *dev;
+	struct ti_iodelay_pingroup *group;
+
+	iod = pinctrl_dev_get_drvdata(pctldev);
+	dev = iod->dev;
+
+	group = ti_iodelay_get_group(iod, gselector);
+	if (!group)
+		return NULL;
+
+	return group->name;
+}
+
+/**
+ * ti_iodelay_pinctrl_get_group_pins() - get group pins
+ * @pctldev:	pinctrl device representing IODelay device
+ * @gselector: Group selector
+ * @pins:	pointer to the pins
+ * @npins:	number of pins
+ *
+ * Dummy implementation since we do not track pins, we track configurations
+ * Forced by pinctrl's pinctrl_check_ops()
+ *
+ * Return: -EINVAL
+ */
+static int ti_iodelay_pinctrl_get_group_pins(struct pinctrl_dev *pctldev,
+					     unsigned gselector,
+					     const unsigned **pins,
+					     unsigned *npins)
+{
+	/* Dummy implementation - we dont do pin mux */
+	return -EINVAL;
+}
+
+/**
+ * ti_iodelay_pinconf_group_get() - Get the group configuration
+ * @pctldev:	pinctrl device representing IODelay device
+ * @gselector:	Group selector
+ * @config:	configuration returned
+ *
+ * Return: The configuration if the group is valid, else returns -EINVAL
+ */
+static int ti_iodelay_pinconf_group_get(struct pinctrl_dev *pctldev,
+					unsigned gselector,
+					unsigned long *config)
+{
+	struct ti_iodelay_device *iod;
+	struct device *dev;
+	struct ti_iodelay_pingroup *group;
+
+	iod = pinctrl_dev_get_drvdata(pctldev);
+	dev = iod->dev;
+	group = ti_iodelay_get_group(iod, gselector);
+
+	if (!group)
+		return -EINVAL;
+
+	*config = group->config;
+	return 0;
+}
+
+/**
+ * ti_iodelay_pinconf_group_set() - Configure the groups of pins
+ * @pctldev:	pinctrl device representing IODelay device
+ * @gselector:	Group selector
+ * @configs:	Configurations
+ * @num_configs: Number of configurations
+ *
+ * Return: 0 if all went fine, else appropriate error value.
+ */
+static int ti_iodelay_pinconf_group_set(struct pinctrl_dev *pctldev,
+					unsigned gselector,
+					unsigned long *configs,
+					unsigned num_configs)
+{
+	struct ti_iodelay_device *iod;
+	struct device *dev;
+	struct ti_iodelay_pingroup *group;
+	int i;
+
+	iod = pinctrl_dev_get_drvdata(pctldev);
+	dev = iod->dev;
+	group = ti_iodelay_get_group(iod, gselector);
+
+	if (num_configs != 1) {
+		dev_err(dev, "Unsupported number of configurations %d\n",
+			num_configs);
+		return -EINVAL;
+	}
+
+	if (*configs != PIN_CONFIG_END) {
+		dev_err(dev, "Unsupported configuration\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < group->nvals; i++) {
+		if (ti_iodelay_pinconf_set(iod, &group->vals[i]))
+			return -ENOTSUPP;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * ti_iodelay_pinconf_group_dbg_show() - show the group information
+ * @pctldev:	Show the group information
+ * @s:	Sequence file
+ * @gselector:	group selector
+ *
+ * Provide the configuration information of the selected group
+ */
+static void ti_iodelay_pinconf_group_dbg_show(struct pinctrl_dev *pctldev,
+					      struct seq_file *s,
+					      unsigned gselector)
+{
+	struct ti_iodelay_device *iod;
+	struct device *dev;
+	struct ti_iodelay_pingroup *group;
+	int i;
+
+	iod = pinctrl_dev_get_drvdata(pctldev);
+	dev = iod->dev;
+	group = ti_iodelay_get_group(iod, gselector);
+	if (!group)
+		return;
+
+	for (i = 0; i < group->nvals; i++) {
+		struct ti_iodelay_conf_vals *val;
+		u32 reg = 0;
+
+		val = &group->vals[i];
+		regmap_read(iod->regmap, val->offset, &reg),
+		    seq_printf(s, "\n\t0x%08x = 0x%08x (%3d, %3d)",
+			       val->offset, reg, val->a_delay, val->g_delay);
+	}
+}
+#endif
+
+static struct pinctrl_ops ti_iodelay_pinctrl_ops = {
+	.dt_node_to_map = ti_iodelay_dt_node_to_map,
+	.dt_free_map = ti_iodelay_dt_free_map,
+	.get_groups_count = ti_iodelay_pinctrl_get_groups_count,
+	.get_group_name = ti_iodelay_pinctrl_get_group_name,
+	.get_group_pins = ti_iodelay_pinctrl_get_group_pins,
+};
+
+static struct pinconf_ops ti_iodelay_pinctrl_pinconf_ops = {
+	.pin_config_group_get = ti_iodelay_pinconf_group_get,
+	.pin_config_group_set = ti_iodelay_pinconf_group_set,
+#ifdef CONFIG_DEBUG_FS
+	.pin_config_group_dbg_show = ti_iodelay_pinconf_group_dbg_show,
+#endif
+};
+
+/**
+ * ti_iodelay_alloc_pins() - Allocate structures needed for pins for IOdelay
+ * @dev:	device pointer
+ * @iod:	IODelay device
+ * @base_phy:	Base Physical Address
+ *
+ * Return: 0 if all went fine, else appropriate error value.
+ */
+static int ti_iodelay_alloc_pins(struct device *dev,
+				 struct ti_iodelay_device *iod, u32 base_phy)
+{
+	const struct ti_iodelay_reg_data *r = iod->reg_data;
+	struct pinctrl_pin_desc *pin;
+	struct ti_iodelay_pin_name *pn;
+	u32 phy_reg;
+	int nr_pins, i;
+
+	nr_pins = (r->regmap_config->max_register - r->reg_start_offset) / 4;
+
+	dev_dbg(dev, "Allocating %i pins\n", nr_pins);
+
+	iod->pa = devm_kzalloc(dev, sizeof(*iod->pa) * nr_pins, GFP_KERNEL);
+	if (!iod->pa)
+		return -ENOMEM;
+
+	iod->names =
+	    devm_kzalloc(dev, sizeof(struct ti_iodelay_pin_name) * nr_pins,
+			 GFP_KERNEL);
+	if (!iod->names)
+		return -ENOMEM;
+
+	iod->desc.pins = iod->pa;
+	iod->desc.npins = nr_pins;
+
+	phy_reg = r->reg_start_offset + base_phy;
+	pn = iod->names;
+	for (i = 0; i < nr_pins; i++, pn++, phy_reg += 4) {
+		pin = &iod->pa[i];
+		sprintf(pn->name, "%x.%d", phy_reg, i);
+		pin->number = i;
+		pin->name = pn->name;
+	}
+
+	return 0;
+}
+
+/**
+ * ti_iodelay_probe() - Standard probe
+ * @pdev:	platform device
+ *
+ * Return: 0 if all went fine, else appropriate error value.
+ */
+static int ti_iodelay_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = of_node_get(dev->of_node);
+	const struct of_device_id *match;
+	struct resource *res;
+	struct ti_iodelay_device *iod;
+	int ret = 0;
+
+	if (!np) {
+		ret = -EINVAL;
+		dev_err(dev, "No OF node\n");
+		goto exit_out;
+	}
+
+	match = of_match_device(ti_iodelay_of_match, dev);
+	if (!match) {
+		ret = -EINVAL;
+		dev_err(dev, "No DATA match\n");
+		goto exit_out;
+	}
+
+	iod = devm_kzalloc(dev, sizeof(*iod), GFP_KERNEL);
+	if (!iod) {
+		ret = -ENOMEM;
+		goto exit_out;
+	}
+	iod->dev = dev;
+	iod->reg_data = match->data;
+
+	/* So far We can assume there is only 1 bank of registers */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "Missing MEM resource\n");
+		ret = -ENODEV;
+		goto exit_out;
+	}
+
+	iod->reg_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(iod->reg_base)) {
+		ret = PTR_ERR(iod->reg_base);
+		goto exit_out;
+	}
+
+	iod->regmap = devm_regmap_init_mmio(dev, iod->reg_base,
+					    iod->reg_data->regmap_config);
+	if (IS_ERR(iod->regmap)) {
+		dev_err(dev, "Regmap MMIO init failed.\n");
+		ret = PTR_ERR(iod->regmap);
+		goto exit_out;
+	}
+
+	if (ti_iodelay_pinconf_init_dev(iod))
+		goto exit_out;
+
+	ret = ti_iodelay_alloc_pins(dev, iod, res->start);
+	if (ret)
+		goto exit_out;
+
+	INIT_LIST_HEAD(&iod->groups);
+	mutex_init(&iod->mutex);
+
+	iod->desc.pctlops = &ti_iodelay_pinctrl_ops;
+	/* no pinmux ops - we are pinconf */
+	iod->desc.confops = &ti_iodelay_pinctrl_pinconf_ops;
+	iod->desc.name = dev_name(dev);
+	iod->desc.owner = THIS_MODULE;
+
+	iod->pctl = pinctrl_register(&iod->desc, dev, iod);
+	if (!iod->pctl) {
+		dev_err(dev, "Failed to register pinctrl\n");
+		ret = -ENODEV;
+		goto exit_out;
+	}
+
+	platform_set_drvdata(pdev, iod);
+
+exit_out:
+	of_node_put(np);
+	return ret;
+}
+
+/**
+ * ti_iodelay_remove() - standard remove
+ * @pdev:	platform device
+ *
+ * Return: 0 if all went fine, else appropriate error value.
+ */
+static int ti_iodelay_remove(struct platform_device *pdev)
+{
+	struct ti_iodelay_device *iod = platform_get_drvdata(pdev);
+
+	if (!iod)
+		return 0;
+	if (iod->pctl)
+		pinctrl_unregister(iod->pctl);
+
+	ti_iodelay_pinconf_deinit_dev(iod);
+
+	/* Expect other allocations to be freed by devm */
+
+	return 0;
+}
+
+static struct regmap_config dra7_iodelay_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.max_register = 0xD1C,
+};
+
+static struct ti_iodelay_reg_data dra7_iodelay_data = {
+	.signature_mask = 0x0003F000,
+	.signature_value = 0x29,
+	.lock_mask = 0x00000400,
+	.lock_val = 1,
+	.unlock_val = 0,
+	.binary_data_coarse_mask = 0x000003E0,
+	.binary_data_fine_mask = 0x0000001F,
+
+	.reg_refclk_offset = 0x14,
+	.refclk_period_mask = 0xFFFF,
+
+	.reg_coarse_offset = 0x18,
+	.coarse_delay_count_mask = 0xFFFF0000,
+	.coarse_ref_count_mask = 0x0000FFFF,
+
+	.reg_fine_offset = 0x1C,
+	.fine_delay_count_mask = 0xFFFF0000,
+	.fine_ref_count_mask = 0x0000FFFF,
+
+	.reg_global_lock_offset = 0x2C,
+	.global_lock_mask = 0x0000FFFF,
+	.global_unlock_val = 0x0000AAAA,
+	.global_lock_val = 0x0000AAAB,
+
+	.reg_start_offset = 0x30,
+	.regmap_config = &dra7_iodelay_regmap_config,
+};
+
+static const struct of_device_id ti_iodelay_of_match[] = {
+	{.compatible = "ti,dra7-iodelay", .data = &dra7_iodelay_data},
+	{ /* Hopefully no more.. */ },
+};
+MODULE_DEVICE_TABLE(of, ti_iodelay_of_match);
+
+static struct platform_driver ti_iodelay_driver = {
+	.probe = ti_iodelay_probe,
+	.remove = ti_iodelay_remove,
+	.driver = {
+		   .owner = THIS_MODULE,
+		   .name = DRIVER_NAME,
+		   .of_match_table = ti_iodelay_of_match,
+		   },
+};
+module_platform_driver(ti_iodelay_driver);
+
+MODULE_AUTHOR("Texas Instruments, Inc.");
+MODULE_DESCRIPTION("Pinconf driver for TI's IO Delay module");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 63454b5..c1bb046 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -1,3 +1,4 @@
 source "drivers/power/avs/Kconfig"
 source "drivers/power/reset/Kconfig"
 source "drivers/power/supply/Kconfig"
+source "drivers/power/pwrseq/Kconfig"
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index ff35c71..7db8035 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_POWER_AVS)		+= avs/
 obj-$(CONFIG_POWER_RESET)	+= reset/
 obj-$(CONFIG_POWER_SUPPLY)	+= supply/
+obj-$(CONFIG_POWER_SEQUENCE)	+= pwrseq/
diff --git b/drivers/power/pwrseq/Kconfig b/drivers/power/pwrseq/Kconfig
new file mode 100644
index 0000000..3859a67
--- /dev/null
+++ b/drivers/power/pwrseq/Kconfig
@@ -0,0 +1,19 @@
+#
+# Power Sequence library
+#
+
+config POWER_SEQUENCE
+	bool
+
+menu "Power Sequence Support"
+
+config PWRSEQ_GENERIC
+	bool "Generic power sequence control"
+	depends on OF
+	select POWER_SEQUENCE
+	help
+	   It is used for drivers which needs to do power sequence
+	   (eg, turn on clock, toggle reset gpio) before the related
+	   devices can be found by hardware. This generic one can be
+	   used for common power sequence control.
+endmenu
diff --git b/drivers/power/pwrseq/Makefile b/drivers/power/pwrseq/Makefile
new file mode 100644
index 0000000..ad82389
--- /dev/null
+++ b/drivers/power/pwrseq/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_POWER_SEQUENCE) += core.o
+obj-$(CONFIG_PWRSEQ_GENERIC) += pwrseq_generic.o
diff --git b/drivers/power/pwrseq/core.c b/drivers/power/pwrseq/core.c
new file mode 100644
index 0000000..9cb1223
--- /dev/null
+++ b/drivers/power/pwrseq/core.c
@@ -0,0 +1,191 @@
+/*
+ * core.c	power sequence core file
+ *
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Author: Peter Chen <peter.chen@nxp.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/power/pwrseq.h>
+
+static DEFINE_MUTEX(pwrseq_list_mutex);
+static LIST_HEAD(pwrseq_list);
+
+int pwrseq_get(struct device_node *np, struct pwrseq *p)
+{
+	if (p && p->get)
+		return p->get(np, p);
+
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL_GPL(pwrseq_get);
+
+int pwrseq_on(struct pwrseq *p)
+{
+	if (p && p->on)
+		return p->on(p);
+
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL_GPL(pwrseq_on);
+
+void pwrseq_off(struct pwrseq *p)
+{
+	if (p && p->off)
+		p->off(p);
+}
+EXPORT_SYMBOL_GPL(pwrseq_off);
+
+void pwrseq_put(struct pwrseq *p)
+{
+	if (p && p->put)
+		p->put(p);
+}
+EXPORT_SYMBOL_GPL(pwrseq_put);
+
+int pwrseq_suspend(struct pwrseq *p)
+{
+	if (p && p->suspend)
+		return p->suspend(p);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pwrseq_suspend);
+
+int pwrseq_resume(struct pwrseq *p)
+{
+	if (p && p->resume)
+		return p->resume(p);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pwrseq_resume);
+
+void pwrseq_register(struct pwrseq *pwrseq)
+{
+	mutex_lock(&pwrseq_list_mutex);
+	list_add(&pwrseq->node, &pwrseq_list);
+	mutex_unlock(&pwrseq_list_mutex);
+}
+EXPORT_SYMBOL_GPL(pwrseq_register);
+
+void pwrseq_unregister(struct pwrseq *pwrseq)
+{
+	mutex_lock(&pwrseq_list_mutex);
+	list_del(&pwrseq->node);
+	mutex_unlock(&pwrseq_list_mutex);
+}
+EXPORT_SYMBOL_GPL(pwrseq_unregister);
+
+static struct pwrseq *pwrseq_find_available_instance(struct device_node *np)
+{
+	struct pwrseq *pwrseq;
+
+	list_for_each_entry(pwrseq, &pwrseq_list, node) {
+		if (pwrseq->used)
+			continue;
+
+		/* compare compatible string for pwrseq node */
+		if (of_match_node(pwrseq->pwrseq_of_match_table, np)) {
+			pwrseq->used = true;
+			return pwrseq;
+		}
+
+		/* return generic pwrseq instance */
+		if (!strcmp(pwrseq->pwrseq_of_match_table->compatible,
+				"generic")) {
+			pr_debug("using generic pwrseq instance for %s\n",
+				np->full_name);
+			pwrseq->used = true;
+			return pwrseq;
+		}
+	}
+	pr_warn("Can't find any pwrseq instances for %s\n", np->full_name);
+
+	return NULL;
+}
+
+struct pwrseq *of_pwrseq_on(struct device_node *np)
+{
+	struct pwrseq *pwrseq;
+	int ret;
+
+	pwrseq = pwrseq_find_available_instance(np);
+	if (!pwrseq)
+		return ERR_PTR(-ENONET);
+
+	ret = pwrseq_get(np, pwrseq);
+	if (ret) {
+		/* Mark current pwrseq as unused */
+		pwrseq->used = false;
+		return ERR_PTR(ret);
+	}
+
+	ret = pwrseq_on(pwrseq);
+	if (ret)
+		goto pwr_put;
+
+	return pwrseq;
+
+pwr_put:
+	pwrseq_put(pwrseq);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(of_pwrseq_on);
+
+void of_pwrseq_off(struct pwrseq *pwrseq)
+{
+	pwrseq_off(pwrseq);
+	pwrseq_put(pwrseq);
+}
+EXPORT_SYMBOL_GPL(of_pwrseq_off);
+
+int of_pwrseq_on_list(struct device_node *np, struct list_head *head)
+{
+	struct pwrseq *pwrseq;
+	struct pwrseq_list_per_dev *pwrseq_list_node;
+
+	pwrseq = of_pwrseq_on(np);
+	if (IS_ERR(pwrseq))
+		return PTR_ERR(pwrseq);
+
+	pwrseq_list_node = kzalloc(sizeof(*pwrseq_list_node), GFP_KERNEL);
+	if (!pwrseq_list_node) {
+		of_pwrseq_off(pwrseq);
+		return -ENOMEM;
+	}
+	pwrseq_list_node->pwrseq = pwrseq;
+	list_add(&pwrseq_list_node->list, head);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_pwrseq_on_list);
+
+void of_pwrseq_off_list(struct list_head *head)
+{
+	struct pwrseq *pwrseq;
+	struct pwrseq_list_per_dev *pwrseq_list_node, *tmp_node;
+
+	list_for_each_entry_safe(pwrseq_list_node, tmp_node, head, list) {
+		pwrseq = pwrseq_list_node->pwrseq;
+		of_pwrseq_off(pwrseq);
+		list_del(&pwrseq_list_node->list);
+		kfree(pwrseq_list_node);
+	}
+}
+EXPORT_SYMBOL_GPL(of_pwrseq_off_list);
diff --git b/drivers/power/pwrseq/pwrseq_generic.c b/drivers/power/pwrseq/pwrseq_generic.c
new file mode 100644
index 0000000..d7a77f2
--- /dev/null
+++ b/drivers/power/pwrseq/pwrseq_generic.c
@@ -0,0 +1,183 @@
+/*
+ * pwrseq_generic.c	Generic power sequence handling
+ *
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Author: Peter Chen <peter.chen@nxp.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/slab.h>
+
+#include <linux/power/pwrseq.h>
+
+struct pwrseq_generic {
+	struct pwrseq pwrseq;
+	struct gpio_desc *gpiod_reset;
+	struct clk *clks[PWRSEQ_MAX_CLKS];
+	u32 duration_us;
+};
+
+#define to_generic_pwrseq(p) container_of(p, struct pwrseq_generic, pwrseq)
+
+static int pwrseq_generic_alloc_instance(void);
+static const struct of_device_id generic_id_table[] = {
+	{ .compatible = "generic",},
+	{ /* sentinel */ }
+};
+
+static void pwrseq_generic_put(struct pwrseq *pwrseq)
+{
+	struct pwrseq_generic *pwrseq_gen = to_generic_pwrseq(pwrseq);
+	int clk;
+
+	if (pwrseq_gen->gpiod_reset)
+		gpiod_put(pwrseq_gen->gpiod_reset);
+
+	for (clk = 0; clk < PWRSEQ_MAX_CLKS; clk++)
+		clk_put(pwrseq_gen->clks[clk]);
+
+	pwrseq_unregister(&pwrseq_gen->pwrseq);
+	kfree(pwrseq_gen);
+}
+
+static void pwrseq_generic_off(struct pwrseq *pwrseq)
+{
+	struct pwrseq_generic *pwrseq_gen = to_generic_pwrseq(pwrseq);
+	int clk;
+
+	for (clk = PWRSEQ_MAX_CLKS - 1; clk >= 0; clk--)
+		clk_disable_unprepare(pwrseq_gen->clks[clk]);
+}
+
+static int pwrseq_generic_on(struct pwrseq *pwrseq)
+{
+	struct pwrseq_generic *pwrseq_gen = to_generic_pwrseq(pwrseq);
+	int clk, ret = 0;
+	struct gpio_desc *gpiod_reset = pwrseq_gen->gpiod_reset;
+
+	for (clk = 0; clk < PWRSEQ_MAX_CLKS && pwrseq_gen->clks[clk]; clk++) {
+		ret = clk_prepare_enable(pwrseq_gen->clks[clk]);
+		if (ret) {
+			pr_err("Can't enable clock, ret=%d\n", ret);
+			goto err_disable_clks;
+		}
+	}
+
+	if (gpiod_reset) {
+		u32 duration_us = pwrseq_gen->duration_us;
+
+		if (duration_us <= 10)
+			udelay(10);
+		else
+			usleep_range(duration_us, duration_us + 100);
+		gpiod_set_value(gpiod_reset, 0);
+	}
+
+	return ret;
+
+err_disable_clks:
+	while (--clk >= 0)
+		clk_disable_unprepare(pwrseq_gen->clks[clk]);
+
+	return ret;
+}
+
+static int pwrseq_generic_get(struct device_node *np, struct pwrseq *pwrseq)
+{
+	struct pwrseq_generic *pwrseq_gen = to_generic_pwrseq(pwrseq);
+	enum of_gpio_flags flags;
+	int reset_gpio, clk, ret = 0;
+
+	for (clk = 0; clk < PWRSEQ_MAX_CLKS; clk++) {
+		pwrseq_gen->clks[clk] = of_clk_get(np, clk);
+		if (IS_ERR(pwrseq_gen->clks[clk])) {
+			ret = PTR_ERR(pwrseq_gen->clks[clk]);
+			if (ret != -ENOENT)
+				goto err_put_clks;
+			pwrseq_gen->clks[clk] = NULL;
+			break;
+		}
+	}
+
+	reset_gpio = of_get_named_gpio_flags(np, "reset-gpios", 0, &flags);
+	if (gpio_is_valid(reset_gpio)) {
+		unsigned long gpio_flags;
+
+		if (flags & OF_GPIO_ACTIVE_LOW)
+			gpio_flags = GPIOF_ACTIVE_LOW | GPIOF_OUT_INIT_LOW;
+		else
+			gpio_flags = GPIOF_OUT_INIT_HIGH;
+
+		ret = gpio_request_one(reset_gpio, gpio_flags,
+				"pwrseq-reset-gpios");
+		if (ret)
+			goto err_put_clks;
+
+		pwrseq_gen->gpiod_reset = gpio_to_desc(reset_gpio);
+		of_property_read_u32(np, "reset-duration-us",
+				&pwrseq_gen->duration_us);
+	} else if (reset_gpio == -ENOENT) {
+		; /* no such gpio */
+	} else {
+		ret = reset_gpio;
+		pr_err("Failed to get reset gpio on %s, err = %d\n",
+				np->full_name, reset_gpio);
+		goto err_put_clks;
+	}
+
+	/* allocate new one for later pwrseq instance request */
+	ret = pwrseq_generic_alloc_instance();
+	if (ret)
+		goto err_put_gpio;
+
+	return 0;
+
+err_put_gpio:
+	if (pwrseq_gen->gpiod_reset)
+		gpiod_put(pwrseq_gen->gpiod_reset);
+err_put_clks:
+	while (--clk >= 0)
+		clk_put(pwrseq_gen->clks[clk]);
+	return ret;
+}
+
+static int pwrseq_generic_alloc_instance(void)
+{
+	struct pwrseq_generic *pwrseq_gen;
+
+	pwrseq_gen = kzalloc(sizeof(*pwrseq_gen), GFP_KERNEL);
+	if (!pwrseq_gen)
+		return -ENOMEM;
+
+	pwrseq_gen->pwrseq.pwrseq_of_match_table = generic_id_table;
+	pwrseq_gen->pwrseq.get = pwrseq_generic_get;
+	pwrseq_gen->pwrseq.on = pwrseq_generic_on;
+	pwrseq_gen->pwrseq.off = pwrseq_generic_off;
+	pwrseq_gen->pwrseq.put = pwrseq_generic_put;
+
+	pwrseq_register(&pwrseq_gen->pwrseq);
+	return 0;
+}
+
+static int __init pwrseq_generic_register(void)
+{
+	return pwrseq_generic_alloc_instance();
+}
+postcore_initcall(pwrseq_generic_register)
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 2e05046..3c6dfad 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -751,11 +751,11 @@ static int spidev_probe(struct spi_device *spi)
 	 * compatible string, it is a Linux implementation thing
 	 * rather than a description of the hardware.
 	 */
-	if (spi->dev.of_node && !of_match_device(spidev_dt_ids, &spi->dev)) {
-		dev_err(&spi->dev, "buggy DT: spidev listed directly in DT\n");
-		WARN_ON(spi->dev.of_node &&
-			!of_match_device(spidev_dt_ids, &spi->dev));
-	}
+//	if (spi->dev.of_node && !of_match_device(spidev_dt_ids, &spi->dev)) {
+//		dev_err(&spi->dev, "buggy DT: spidev listed directly in DT\n");
+//		WARN_ON(spi->dev.of_node &&
+//			!of_match_device(spidev_dt_ids, &spi->dev));
+//	}
 
 	spidev_probe_acpi(spi);
 
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index da31159..2c2d09c 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -1437,10 +1437,10 @@ static int __init omap8250_console_fixup(void)
 	}
 
 	add_preferred_console("ttyS", idx, options);
-	pr_err("WARNING: Your 'console=ttyO%d' has been replaced by 'ttyS%d'\n",
+	pr_info("WARNING: Your 'console=ttyO%d' has been replaced by 'ttyS%d'\n",
 	       idx, idx);
-	pr_err("This ensures that you still see kernel messages. Please\n");
-	pr_err("update your kernel commandline.\n");
+	pr_info("This ensures that you still see kernel messages. Please\n");
+	pr_info("update your kernel commandline.\n");
 	return 0;
 }
 console_initcall(omap8250_console_fixup);
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index 472ba3c..4eb25f8 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -1595,6 +1595,31 @@ static int serial_omap_probe_rs485(struct uart_omap_port *up,
 	return 0;
 }
 
+static int serial_omap_of_get_port_line(struct device_node *np)
+{
+	unsigned long val;
+	const char *hwmod;
+	int ret;
+
+	/* first try the serial alias */
+	ret = of_alias_get_id(np, "serial");
+	if (ret >= 0)
+		return ret;
+
+	/* no? calculate it from hwmods */
+	ret = of_property_read_string(np, "ti,hwmods", &hwmod);
+	if (ret != 0 || strncmp(hwmod, "uart", 4) ||
+			kstrtoul(hwmod + 4, 10, &val))
+		return -ENODEV;
+
+	/* numbering of hwmods is +1 */
+	ret = (int)val - 1;
+	if (ret < 0)
+		return -ENODEV;
+
+	return ret;
+}
+
 static int serial_omap_probe(struct platform_device *pdev)
 {
 	struct omap_uart_port_info *omap_up_info = dev_get_platdata(&pdev->dev);
@@ -1612,7 +1637,10 @@ static int serial_omap_probe(struct platform_device *pdev)
 			return -EPROBE_DEFER;
 		wakeirq = irq_of_parse_and_map(pdev->dev.of_node, 1);
 		omap_up_info = of_get_uart_port_info(&pdev->dev);
-		pdev->dev.platform_data = omap_up_info;
+		ret = platform_device_add_data(pdev, omap_up_info,
+				sizeof(*omap_up_info));
+		if (ret != 0)
+			return ret;
 	} else {
 		uartirq = platform_get_irq(pdev, 0);
 		if (uartirq < 0)
@@ -1638,7 +1666,7 @@ static int serial_omap_probe(struct platform_device *pdev)
 	up->port.ops = &serial_omap_pops;
 
 	if (pdev->dev.of_node)
-		ret = of_alias_get_id(pdev->dev.of_node, "serial");
+		ret = serial_omap_of_get_port_line(pdev->dev.of_node);
 	else
 		ret = pdev->id;
 
diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig
index 52c98ce..05400bc 100644
--- a/drivers/uio/Kconfig
+++ b/drivers/uio/Kconfig
@@ -129,7 +129,7 @@ config UIO_PRUSS
 	select GENERIC_ALLOCATOR
 	depends on HAS_IOMEM && HAS_DMA
 	help
-	  PRUSS driver for OMAPL138/DA850/AM18XX devices
+	  PRUSS driver for OMAPL138/DA850/AM18XX and AM33XX devices
 	  PRUSS driver requires user space components, examples and user space
 	  driver is available from below SVN repo - you may use anonymous login
 
diff --git a/drivers/uio/uio_pruss.c b/drivers/uio/uio_pruss.c
index ca9e2fa..6559752 100644
--- a/drivers/uio/uio_pruss.c
+++ b/drivers/uio/uio_pruss.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/platform_device.h>
+#include <linux/of_gpio.h>
 #include <linux/uio_driver.h>
 #include <linux/platform_data/uio_pruss.h>
 #include <linux/io.h>
@@ -27,6 +28,11 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/genalloc.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/err.h>
+#include <linux/pm_runtime.h>
 
 #define DRV_NAME "pruss_uio"
 #define DRV_VERSION "1.0"
@@ -106,10 +112,12 @@ static void pruss_cleanup(struct device *dev, struct uio_pruss_dev *gdev)
 		dma_free_coherent(dev, extram_pool_sz, gdev->ddr_vaddr,
 			gdev->ddr_paddr);
 	}
+#ifdef CONFIG_ARCH_DAVINCI_DA850
 	if (gdev->sram_vaddr)
 		gen_pool_free(gdev->sram_pool,
 			      gdev->sram_vaddr,
 			      sram_pool_sz);
+#endif
 	kfree(gdev->info);
 	clk_put(gdev->pruss_clk);
 	kfree(gdev);
@@ -120,9 +128,15 @@ static int pruss_probe(struct platform_device *pdev)
 	struct uio_info *p;
 	struct uio_pruss_dev *gdev;
 	struct resource *regs_prussio;
+	struct resource res;
 	struct device *dev = &pdev->dev;
 	int ret = -ENODEV, cnt = 0, len;
 	struct uio_pruss_pdata *pdata = dev_get_platdata(dev);
+	struct pinctrl *pinctrl;
+
+	int count;
+	struct device_node *child;
+	const char *pin_name;
 
 	gdev = kzalloc(sizeof(struct uio_pruss_dev), GFP_KERNEL);
 	if (!gdev)
@@ -133,7 +147,7 @@ static int pruss_probe(struct platform_device *pdev)
 		kfree(gdev);
 		return -ENOMEM;
 	}
-
+#ifdef CONFIG_ARCH_DAVINCI_DA850
 	/* Power on PRU in case its not done as part of boot-loader */
 	gdev->pruss_clk = clk_get(dev, "pruss");
 	if (IS_ERR(gdev->pruss_clk)) {
@@ -145,8 +159,25 @@ static int pruss_probe(struct platform_device *pdev)
 	} else {
 		clk_enable(gdev->pruss_clk);
 	}
+#endif
+
+	if (pdev->dev.of_node) {
+		pm_runtime_enable(&pdev->dev);
+		ret = pm_runtime_get_sync(&pdev->dev);
+		if (IS_ERR_VALUE(ret)) {
+			dev_err(&pdev->dev, "pm_runtime_get_sync() failed\n");
+			return ret;
+		}
 
-	regs_prussio = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		ret = of_address_to_resource(pdev->dev.of_node, 0, &res);
+		if (IS_ERR_VALUE(ret)) {
+			dev_err(&pdev->dev, "failed to parse DT reg\n");
+			return ret;
+		}
+		regs_prussio = &res;
+	}
+	else
+		regs_prussio = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!regs_prussio) {
 		dev_err(dev, "No PRUSS I/O resource specified\n");
 		goto out_free;
@@ -157,7 +188,50 @@ static int pruss_probe(struct platform_device *pdev)
 		goto out_free;
 	}
 
-	if (pdata->sram_pool) {
+
+	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
+	if (IS_ERR(pinctrl))
+		dev_warn(&pdev->dev,
+			"pins are not configured from the driver\n");
+	else{
+		count = of_get_child_count(pdev->dev.of_node);
+		if (!count){
+			dev_info(&pdev->dev, "No children\n");
+			return -ENODEV;
+		}
+		// Run through all children. They have lables for easy reference.
+		for_each_child_of_node(pdev->dev.of_node, child){
+			enum of_gpio_flags flags;
+			unsigned gpio;
+
+			count = of_gpio_count(child);
+
+			ret = of_property_count_strings(child, "pin-names");
+			if (ret < 0) {
+				dev_err(&pdev->dev, "Failed to get pin-names\n");
+				continue;
+			}
+			if(count != ret){
+				dev_err(&pdev->dev, "The number of gpios (%d) does not match"\
+					" the number of pin names (%d)\n", count, ret);
+				continue;
+			}
+
+			for(cnt=0; cnt<count; cnt++){
+				ret = of_property_read_string_index(child,
+					"pin-names", cnt, &pin_name);
+				if (ret != 0)
+					dev_err(&pdev->dev, "Error on pin-name #%d\n", cnt);
+				gpio = of_get_gpio_flags(child, cnt, &flags);
+				ret = devm_gpio_request_one(&pdev->dev, gpio, flags, pin_name);
+				if (ret < 0) {
+		                        dev_err(dev, "Failed to request GPIO %d (%s) flags: '%d', error %d\n",
+					gpio, pin_name, flags, ret);
+		                }
+			}
+		}
+	}
+	if (pdata && pdata->sram_pool) {
 		gdev->sram_pool = pdata->sram_pool;
 		gdev->sram_vaddr =
 			(unsigned long)gen_pool_dma_alloc(gdev->sram_pool,
@@ -182,7 +256,17 @@ static int pruss_probe(struct platform_device *pdev)
 		goto out_free;
 	}
 
-	gdev->pintc_base = pdata->pintc_base;
+	if (pdev->dev.of_node) {
+		ret = of_property_read_u32(pdev->dev.of_node,
+					   "ti,pintc-offset",
+					   &gdev->pintc_base);
+		if (ret < 0) {
+			dev_err(&pdev->dev,
+				"Can't parse ti,pintc-offset property\n");
+			goto out_free;
+		}
+	} else
+		gdev->pintc_base = pdata->pintc_base;
 	gdev->hostirq_start = platform_get_irq(pdev, 0);
 
 	for (cnt = 0, p = gdev->info; cnt < MAX_PRUSS_EVT; cnt++, p++) {
@@ -190,6 +274,7 @@ static int pruss_probe(struct platform_device *pdev)
 		p->mem[0].size = resource_size(regs_prussio);
 		p->mem[0].memtype = UIO_MEM_PHYS;
 
+#ifdef CONFIG_ARCH_DAVINCI_DA850
 		p->mem[1].addr = gdev->sram_paddr;
 		p->mem[1].size = sram_pool_sz;
 		p->mem[1].memtype = UIO_MEM_PHYS;
@@ -197,7 +282,11 @@ static int pruss_probe(struct platform_device *pdev)
 		p->mem[2].addr = gdev->ddr_paddr;
 		p->mem[2].size = extram_pool_sz;
 		p->mem[2].memtype = UIO_MEM_PHYS;
-
+#else
+		p->mem[1].addr = gdev->ddr_paddr;
+		p->mem[1].size = extram_pool_sz;
+		p->mem[1].memtype = UIO_MEM_PHYS;
+#endif
 		p->name = kasprintf(GFP_KERNEL, "pruss_evt%d", cnt);
 		p->version = DRV_VERSION;
 
@@ -227,11 +316,20 @@ static int pruss_remove(struct platform_device *dev)
 	return 0;
 }
 
+static const struct of_device_id pruss_dt_ids[] = {
+	{ .compatible = "ti,pruss-v1", .data = NULL, },
+	{ .compatible = "ti,pruss-v2", .data = NULL, },
+	{},
+};
+MODULE_DEVICE_TABLE(of, pruss_dt_ids);
+
+
 static struct platform_driver pruss_driver = {
 	.probe = pruss_probe,
 	.remove = pruss_remove,
 	.driver = {
 		   .name = DRV_NAME,
+		   .of_match_table = pruss_dt_ids,
 		   },
 };
 
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 6e0d614..17e69cb 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -896,6 +896,16 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
+	/*
+	 * At device tree, we have no device node for chipidea core,
+	 * the glue layer's node is the parent node for host and udc
+	 * device. But in related driver, the parent device is chipidea
+	 * core. So, in order to let the common driver get parent's node,
+	 * we let the core's device node equals glue layer's node.
+	 */
+	if (dev->parent && dev->parent->of_node)
+		dev->of_node = dev->parent->of_node;
+
 	if (ci->platdata->phy) {
 		ci->phy = ci->platdata->phy;
 	} else if (ci->platdata->usb_phy) {
@@ -906,11 +916,15 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 
 		/* if both generic PHY and USB PHY layers aren't enabled */
 		if (PTR_ERR(ci->phy) == -ENOSYS &&
-				PTR_ERR(ci->usb_phy) == -ENXIO)
-			return -ENXIO;
+				PTR_ERR(ci->usb_phy) == -ENXIO) {
+			ret = -ENXIO;
+			goto clear_of_node;
+		}
 
-		if (IS_ERR(ci->phy) && IS_ERR(ci->usb_phy))
-			return -EPROBE_DEFER;
+		if (IS_ERR(ci->phy) && IS_ERR(ci->usb_phy)) {
+			ret = -EPROBE_DEFER;
+			goto clear_of_node;
+		}
 
 		if (IS_ERR(ci->phy))
 			ci->phy = NULL;
@@ -921,7 +935,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 	ret = ci_usb_phy_init(ci);
 	if (ret) {
 		dev_err(dev, "unable to init phy: %d\n", ret);
-		return ret;
+		goto clear_of_node;
 	}
 
 	ci->hw_bank.phys = res->start;
@@ -1027,6 +1041,8 @@ stop:
 	ci_role_destroy(ci);
 deinit_phy:
 	ci_usb_phy_exit(ci);
+clear_of_node:
+	dev->of_node = NULL;
 
 	return ret;
 }
@@ -1045,6 +1061,7 @@ static int ci_hdrc_remove(struct platform_device *pdev)
 	ci_extcon_unregister(ci);
 	ci_role_destroy(ci);
 	ci_hdrc_enter_lpm(ci, true);
+	ci->dev->of_node = NULL;
 	ci_usb_phy_exit(ci);
 
 	return 0;
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index d0d3f9e..59f0db1 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -26,6 +26,7 @@
 #include <linux/mutex.h>
 #include <linux/random.h>
 #include <linux/pm_qos.h>
+#include <linux/power/pwrseq.h>
 
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
@@ -1632,6 +1633,7 @@ static void hub_disconnect(struct usb_interface *intf)
 	hub->error = 0;
 	hub_quiesce(hub, HUB_DISCONNECT);
 
+	of_pwrseq_off_list(&hub->pwrseq_on_list);
 	mutex_lock(&usb_port_peer_mutex);
 
 	/* Avoid races with recursively_mark_NOTATTACHED() */
@@ -1659,12 +1661,41 @@ static void hub_disconnect(struct usb_interface *intf)
 	kref_put(&hub->kref, hub_release);
 }
 
+#ifdef CONFIG_OF
+static int hub_of_pwrseq_on(struct usb_hub *hub)
+{
+	struct device *parent;
+	struct usb_device *hdev = hub->hdev;
+	struct device_node *np;
+	int ret;
+
+	if (hdev->parent)
+		parent = &hdev->dev;
+	else
+		parent = bus_to_hcd(hdev->bus)->self.controller;
+
+	for_each_child_of_node(parent->of_node, np) {
+		ret = of_pwrseq_on_list(np, &hub->pwrseq_on_list);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+#else
+static int hub_of_pwrseq_on(struct usb_hub *hub)
+{
+	return 0;
+}
+#endif
+
 static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id)
 {
 	struct usb_host_interface *desc;
 	struct usb_endpoint_descriptor *endpoint;
 	struct usb_device *hdev;
 	struct usb_hub *hub;
+	int ret = -ENODEV;
 
 	desc = intf->cur_altsetting;
 	hdev = interface_to_usbdev(intf);
@@ -1769,6 +1800,7 @@ descriptor_error:
 	INIT_DELAYED_WORK(&hub->leds, led_work);
 	INIT_DELAYED_WORK(&hub->init_work, NULL);
 	INIT_WORK(&hub->events, hub_event);
+	INIT_LIST_HEAD(&hub->pwrseq_on_list);
 	usb_get_intf(intf);
 	usb_get_dev(hdev);
 
@@ -1782,11 +1814,14 @@ descriptor_error:
 	if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND)
 		hub->quirk_check_port_auto_suspend = 1;
 
-	if (hub_configure(hub, endpoint) >= 0)
-		return 0;
+	if (hub_configure(hub, endpoint) >= 0) {
+		ret = hub_of_pwrseq_on(hub);
+		if (!ret)
+			return 0;
+	}
 
 	hub_disconnect(intf);
-	return -ENODEV;
+	return ret;
 }
 
 static int
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index 34c1a7e..cd86f91 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -78,6 +78,7 @@ struct usb_hub {
 	struct delayed_work	init_work;
 	struct work_struct      events;
 	struct usb_port		**ports;
+	struct list_head	pwrseq_on_list; /* powered pwrseq node list */
 };
 
 /**
diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c
index ba00cdb..4a52025 100644
--- a/drivers/usb/gadget/function/f_rndis.c
+++ b/drivers/usb/gadget/function/f_rndis.c
@@ -178,9 +178,9 @@ rndis_iad_descriptor = {
 
 	.bFirstInterface =	0, /* XXX, hardcoded */
 	.bInterfaceCount = 	2,	// control + data
-	.bFunctionClass =	USB_CLASS_COMM,
-	.bFunctionSubClass =	USB_CDC_SUBCLASS_ETHERNET,
-	.bFunctionProtocol =	USB_CDC_PROTO_NONE,
+	.bFunctionClass =	USB_CLASS_MISC,
+	.bFunctionSubClass =	0x04,
+	.bFunctionProtocol =	0x01,
 	/* .iFunction = DYNAMIC */
 };
 
diff --git b/firmware/am335x-bone-scale-data.bin b/firmware/am335x-bone-scale-data.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1ce3c1c596d7a7f400b0cc89bda5a41eed2780c5
GIT binary patch
literal 73
pcmd-HXHZUIU{c}EWl|AfLZWk+R0P|Ad@#)bSHb~R0-{lr003gr3L5|b

literal 0
HcmV?d00001

diff --git b/firmware/am335x-evm-scale-data.bin b/firmware/am335x-evm-scale-data.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a222389d233fa8f1c76ef35470b57284999c8df5
GIT binary patch
literal 17
Ucmd-HXJAiZVA55UX8=>$024d{B>(^b

literal 0
HcmV?d00001

diff --git b/firmware/am43x-evm-scale-data.bin b/firmware/am43x-evm-scale-data.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2d71341089816be7989e6dc11b265d9194ac909e
GIT binary patch
literal 41
hcmd-HXAn+dU{VptW>OLB0@CSBDpG9>aG{wnApnMi2I2q!

literal 0
HcmV?d00001

diff --git b/include/dt-bindings/board/am335x-bbw-bbb-base.h b/include/dt-bindings/board/am335x-bbw-bbb-base.h
new file mode 100644
index 0000000..35f6d57
--- /dev/null
+++ b/include/dt-bindings/board/am335x-bbw-bbb-base.h
@@ -0,0 +1,103 @@
+/*
+ * This header provides constants for bbw/bbb pinctrl bindings.
+ *
+ * Copyright (C) 2014 Robert Nelson <robertcnelson@gmail.com>
+ *
+ * Numbers Based on: https://github.com/derekmolloy/boneDeviceTree/tree/master/docs
+ */
+
+#ifndef _DT_BINDINGS_BOARD_AM335X_BBW_BBB_BASE_H
+#define _DT_BINDINGS_BOARD_AM335X_BBW_BBB_BASE_H
+
+#define BONE_P8_03 0x018
+#define BONE_P8_04 0x01C
+
+#define BONE_P8_05 0x008
+#define BONE_P8_06 0x00C
+#define BONE_P8_07 0x090
+#define BONE_P8_08 0x094
+
+#define BONE_P8_09 0x09C
+#define BONE_P8_10 0x098
+#define BONE_P8_11 0x034
+#define BONE_P8_12 0x030
+
+#define BONE_P8_13 0x024
+#define BONE_P8_14 0x028
+#define BONE_P8_15 0x03C
+#define BONE_P8_16 0x038
+
+#define BONE_P8_17 0x02C
+#define BONE_P8_18 0x08C
+#define BONE_P8_19 0x020
+#define BONE_P8_20 0x084
+
+#define BONE_P8_21 0x080
+#define BONE_P8_22 0x014
+#define BONE_P8_23 0x010
+#define BONE_P8_24 0x004
+
+#define BONE_P8_25 0x000
+#define BONE_P8_26 0x07C
+#define BONE_P8_27 0x0E0
+#define BONE_P8_28 0x0E8
+
+#define BONE_P8_29 0x0E4
+#define BONE_P8_30 0x0EC
+#define BONE_P8_31 0x0D8
+#define BONE_P8_32 0x0DC
+
+#define BONE_P8_33 0x0D4
+#define BONE_P8_34 0x0CC
+#define BONE_P8_35 0x0D0
+#define BONE_P8_36 0x0C8
+
+#define BONE_P8_37 0x0C0
+#define BONE_P8_38 0x0C4
+#define BONE_P8_39 0x0B8
+#define BONE_P8_40 0x0BC
+
+#define BONE_P8_41 0x0B0
+#define BONE_P8_42 0x0B4
+#define BONE_P8_43 0x0A8
+#define BONE_P8_44 0x0AC
+
+#define BONE_P8_45 0x0A0
+#define BONE_P8_46 0x0A4
+
+#define BONE_P9_11 0x070
+#define BONE_P9_12 0x078
+
+#define BONE_P9_13 0x074
+#define BONE_P9_14 0x048
+#define BONE_P9_15 0x040
+#define BONE_P9_16 0x04C
+
+#define BONE_P9_17 0x15C
+#define BONE_P9_18 0x158
+#define BONE_P9_19 0x17C
+#define BONE_P9_20 0x178
+
+#define BONE_P9_21 0x154
+#define BONE_P9_22 0x150
+#define BONE_P9_23 0x044
+#define BONE_P9_24 0x184
+
+#define BONE_P9_25 0x1AC
+#define BONE_P9_26 0x180
+#define BONE_P9_27 0x1A4
+#define BONE_P9_28 0x19C
+
+#define BONE_P9_29 0x194
+#define BONE_P9_30 0x198
+#define BONE_P9_31 0x190
+
+/* Shared P21 of P11 */
+#define BONE_P9_41A 0x1B4
+#define BONE_P9_41B 0x1A8
+
+/* Shared P22 of P11 */
+#define BONE_P9_42A 0x164
+#define BONE_P9_42B 0x1A0
+
+#endif
diff --git b/include/dt-bindings/mfd/tps65217.h b/include/dt-bindings/mfd/tps65217.h
new file mode 100644
index 0000000..cafb9e6
--- /dev/null
+++ b/include/dt-bindings/mfd/tps65217.h
@@ -0,0 +1,26 @@
+/*
+ * This header provides macros for TI TPS65217 DT bindings.
+ *
+ * Copyright (C) 2016 Texas Instruments
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __DT_BINDINGS_TPS65217_H__
+#define __DT_BINDINGS_TPS65217_H__
+
+#define TPS65217_IRQ_USB	0
+#define TPS65217_IRQ_AC		1
+#define TPS65217_IRQ_PB		2
+
+#endif
diff --git a/include/dt-bindings/pinctrl/dra.h b/include/dt-bindings/pinctrl/dra.h
index 5c75e80..5a60e3b 100644
--- a/include/dt-bindings/pinctrl/dra.h
+++ b/include/dt-bindings/pinctrl/dra.h
@@ -50,6 +50,8 @@
 
 #define MODE_SELECT		(1 << 8)
 
+#define MANUAL_MODE		MODE_SELECT
+
 #define PULL_ENA		(0 << 16)
 #define PULL_DIS		(1 << 16)
 #define PULL_UP			(1 << 17)
@@ -73,5 +75,9 @@
  */
 #define DRA7XX_CORE_IOPAD(pa, val)	(((pa) & 0xffff) - 0x3400) (val)
 
+/* DRA7 IODELAY configuration parameters */
+#define A_DELAY(val)		((val) & 0xFFFF)
+#define G_DELAY(val)		(((val) & 0xFFFF) << 16)
+
 #endif
 
diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h
index 4ccda89..3cbec4b 100644
--- a/include/linux/mfd/tps65217.h
+++ b/include/linux/mfd/tps65217.h
@@ -234,12 +234,11 @@ struct tps65217_bl_pdata {
 	int dft_brightness;
 };
 
-enum tps65217_irq_type {
-	TPS65217_IRQ_PB,
-	TPS65217_IRQ_AC,
-	TPS65217_IRQ_USB,
-	TPS65217_NUM_IRQ
-};
+/* Interrupt numbers */
+#define TPS65217_IRQ_USB		0
+#define TPS65217_IRQ_AC			1
+#define TPS65217_IRQ_PB			2
+#define TPS65217_NUM_IRQ		3
 
 /**
  * struct tps65217_board - packages regulator init data
diff --git a/include/linux/of.h b/include/linux/of.h
index 299aeb1..725bd28 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -23,8 +23,10 @@
 #include <linux/spinlock.h>
 #include <linux/topology.h>
 #include <linux/notifier.h>
+#include <linux/slab.h>
 #include <linux/property.h>
 #include <linux/list.h>
+#include <linux/rhashtable.h>
 
 #include <asm/byteorder.h>
 #include <asm/errno.h>
@@ -52,6 +54,7 @@ struct device_node {
 	phandle phandle;
 	const char *full_name;
 	struct fwnode_handle fwnode;
+	struct rhash_head ht_node;
 
 	struct	property *properties;
 	struct	property *deadprops;	/* removed properties */
@@ -1184,6 +1187,8 @@ enum of_reconfig_change {
 };
 
 #ifdef CONFIG_OF_DYNAMIC
+#include <linux/slab.h>
+
 extern int of_reconfig_notifier_register(struct notifier_block *);
 extern int of_reconfig_notifier_unregister(struct notifier_block *);
 extern int of_reconfig_notify(unsigned long, struct of_reconfig_data *rd);
@@ -1227,6 +1232,26 @@ static inline int of_changeset_update_property(struct of_changeset *ocs,
 {
 	return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop);
 }
+
+struct device_node *of_changeset_create_device_nodev(
+	struct of_changeset *ocs, struct device_node *parent,
+	const char *fmt, va_list vargs);
+
+__printf(3, 4) struct device_node *
+of_changeset_create_device_node(struct of_changeset *ocs,
+	struct device_node *parent, const char *fmt, ...);
+
+int __of_changeset_add_update_property_copy(struct of_changeset *ocs,
+		struct device_node *np, const char *name, const void *value,
+		int length, bool update);
+
+int __of_changeset_add_update_property_string_list(
+		struct of_changeset *ocs, struct device_node *np,
+		const char *name, const char **strs, int count, bool update);
+
+int of_changeset_node_move(struct of_changeset *ocs,
+	struct device_node *np, struct device_node *new_parent);
+
 #else /* CONFIG_OF_DYNAMIC */
 static inline int of_reconfig_notifier_register(struct notifier_block *nb)
 {
@@ -1246,8 +1271,323 @@ static inline int of_reconfig_get_state_change(unsigned long action,
 {
 	return -EINVAL;
 }
+
+static inline struct device_node *of_changeset_create_device_nodev(
+	struct of_changeset *ocs, struct device_node *parent,
+	const char *fmt, va_list vargs)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline __printf(3, 4) struct device_node *
+of_changeset_create_device_node(struct of_changeset *ocs,
+	struct device_node *parent, const char *fmt, ...)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline int __of_changeset_add_update_property_copy(
+	struct of_changeset *ocs, struct device_node *np,
+	const char *name, const void *value, int length, bool update)
+{
+	return -EINVAL;
+}
+
+static inline __printf(4, 5) int of_changeset_add_property_stringf(
+		struct of_changeset *ocs, struct device_node *np,
+		const char *name, const char *fmt, ...)
+{
+	return -EINVAL;
+}
+
+static inline int of_changeset_update_property_stringf(
+	struct of_changeset *ocs, struct device_node *np,
+	const char *name, const char *fmt, ...)
+{
+	return -EINVAL;
+}
+
+static inline int __of_changeset_add_update_property_string_list(
+		struct of_changeset *ocs, struct device_node *np,
+		const char *name, const char **strs, int count, bool update)
+{
+	return -EINVAL;
+}
+
+static inline int of_changeset_node_move(struct of_changeset *ocs,
+		struct device_node *np, struct device_node *new_parent)
+{
+	return -EINVAL;
+}
+
 #endif /* CONFIG_OF_DYNAMIC */
 
+/**
+ * of_changeset_add_property_copy - Create a new property copying name & value
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @value:	pointer to the value data
+ * @length:	length of the value in bytes
+ *
+ * Adds a property to the changeset by making copies of the name & value
+ * entries.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int of_changeset_add_property_copy(struct of_changeset *ocs,
+	struct device_node *np, const char *name,
+	const void *value, int length)
+{
+	return __of_changeset_add_update_property_copy(ocs, np, name, value,
+			length, false);
+}
+
+/**
+ * of_changeset_update_property_copy - Update a property copying name & value
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @value:	pointer to the value data
+ * @length:	length of the value in bytes
+ *
+ * Update a property to the changeset by making copies of the name & value
+ * entries.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int of_changeset_update_property_copy(struct of_changeset *ocs,
+	struct device_node *np, const char *name,
+	const void *value, int length)
+{
+	return __of_changeset_add_update_property_copy(ocs, np, name, value,
+			length, true);
+}
+
+/**
+ * __of_changeset_add_update_property_string - Create/update a string property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @str:	string property value
+ * @update:	True on update operation
+ *
+ * Adds/updates a string property to the changeset by making copies of the name
+ * and the given value. The @update parameter controls whether an add or
+ * update takes place.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int __of_changeset_add_update_property_string(
+	struct of_changeset *ocs, struct device_node *np, const char *name,
+	const char *str, bool update)
+{
+	return __of_changeset_add_update_property_copy(ocs, np, name, str,
+			strlen(str) + 1, update);
+}
+
+/**
+ * __of_changeset_add_update_property_stringv - Create/update a formatted
+ *						string property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @fmt:	format of string property
+ * @vargs:	arguments of the format string
+ * @update:	True on update operation
+ *
+ * Adds/updates a string property to the changeset by making copies of the name
+ * and the formatted value. The @update parameter controls whether an add or
+ * update takes place.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int __of_changeset_add_update_property_stringv(
+	struct of_changeset *ocs, struct device_node *np, const char *name,
+	const char *fmt, va_list vargs, bool update)
+{
+	char *str;
+	int ret;
+
+	str = kvasprintf(GFP_KERNEL, fmt, vargs);
+	if (!str)
+		return -ENOMEM;
+	ret = __of_changeset_add_update_property_string(ocs, np, name, str,
+			update);
+	kfree(str);
+
+	return ret;
+}
+
+/**
+ * of_changeset_add_property_string_list - Create a new string list property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @strs:	pointer to the string list
+ * @count:	string count
+ *
+ * Adds a string list property to the changeset.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int of_changeset_add_property_string_list(
+	struct of_changeset *ocs, struct device_node *np, const char *name,
+	const char **strs, int count)
+{
+	return __of_changeset_add_update_property_string_list(ocs, np, name,
+			strs, count, false);
+}
+
+/**
+ * of_changeset_update_property_string_list - Update string list property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @strs:	pointer to the string list
+ * @count:	string count
+ *
+ * Updates a string list property to the changeset.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int of_changeset_update_property_string_list(
+	struct of_changeset *ocs, struct device_node *np,
+	const char *name, const char **strs, int count)
+{
+	return __of_changeset_add_update_property_string_list(ocs, np, name,
+			strs, count, true);
+}
+
+/**
+ * of_changeset_add_property_string - Adds a string property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @str:	string property
+ *
+ * Adds a string property to the changeset by making copies of the name
+ * and the string value.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int of_changeset_add_property_string(
+	struct of_changeset *ocs, struct device_node *np,
+	const char *name, const char *str)
+{
+	return __of_changeset_add_update_property_string(ocs, np, name, str,
+			false);
+}
+
+/**
+ * of_changeset_update_property_string - Update a string property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @str:	string property
+ *
+ * Updates a string property to the changeset by making copies of the name
+ * and the string value.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int of_changeset_update_property_string(
+	struct of_changeset *ocs, struct device_node *np,
+	const char *name, const char *str)
+{
+	return __of_changeset_add_update_property_string(ocs, np, name, str,
+			true);
+}
+
+/**
+ * of_changeset_add_property_u32 - Create a new u32 property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @val:	value in host endian format
+ *
+ * Adds a u32 property to the changeset.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int of_changeset_add_property_u32(struct of_changeset *ocs,
+		struct device_node *np, const char *name, u32 val)
+{
+	val = cpu_to_be32(val);
+	return __of_changeset_add_update_property_copy(ocs, np, name, &val,
+			sizeof(val), false);
+}
+
+/**
+ * of_changeset_update_property_u32 - Update u32 property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ * @val:	value in host endian format
+ *
+ * Updates a u32 property to the changeset.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int of_changeset_update_property_u32(
+	struct of_changeset *ocs, struct device_node *np,
+	const char *name, u32 val)
+{
+	val = cpu_to_be32(val);
+	return __of_changeset_add_update_property_copy(ocs, np, name, &val,
+			sizeof(val), true);
+}
+
+/**
+ * of_changeset_add_property_bool - Create a new u32 property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ *
+ * Adds a bool property to the changeset. Note that there is
+ * no option to set the value to false, since the property
+ * existing sets it to true.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int of_changeset_add_property_bool(
+	struct of_changeset *ocs, struct device_node *np, const char *name)
+{
+	return __of_changeset_add_update_property_copy(ocs, np, name, "", 0,
+			false);
+}
+
+/**
+ * of_changeset_update_property_bool - Update a bool property
+ *
+ * @ocs:	changeset pointer
+ * @np:		device node pointer
+ * @name:	name of the property
+ *
+ * Updates a property to the changeset. Note that there is
+ * no option to set the value to false, since the property
+ * existing sets it to true.
+ *
+ * Returns zero on success, a negative error value otherwise.
+ */
+static inline int of_changeset_update_property_bool(struct of_changeset *ocs,
+		struct device_node *np, const char *name)
+{
+	return __of_changeset_add_update_property_copy(ocs, np, name, "", 0,
+			true);
+}
+
 /* CONFIG_OF_RESOLVE api */
 extern int of_resolve_phandles(struct device_node *tree);
 
@@ -1273,6 +1613,10 @@ int of_overlay_create(struct device_node *tree);
 int of_overlay_destroy(int id);
 int of_overlay_destroy_all(void);
 
+int of_overlay_create_target_index(struct device_node *tree, int index);
+int of_overlay_create_target_root(struct device_node *tree,
+		struct device_node *target_root);
+
 #else
 
 static inline int of_overlay_create(struct device_node *tree)
@@ -1290,6 +1634,18 @@ static inline int of_overlay_destroy_all(void)
 	return -ENOTSUPP;
 }
 
+static inline int of_overlay_create_target_index(struct device_node *tree,
+		int index)
+{
+	return -ENOTSUPP;
+}
+
+static inline int of_overlay_create_target_root(struct device_node *tree,
+		struct device_node *target_root)
+{
+	return -ENOTSUPP;
+}
+
 #endif
 
 #endif /* _LINUX_OF_H */
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index f6bc765..e9be0ec 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -17,14 +17,64 @@
 #include <linux/err.h>
 #include <linux/notifier.h>
 
+struct clk;
+struct regulator;
 struct dev_pm_opp;
 struct device;
-struct opp_table;
 
 enum dev_pm_opp_event {
 	OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
 };
 
+/**
+ * struct dev_pm_opp_supply - Power supply voltage/current values
+ * @u_volt:	Target voltage in microvolts corresponding to this OPP
+ * @u_volt_min:	Minimum voltage in microvolts corresponding to this OPP
+ * @u_volt_max:	Maximum voltage in microvolts corresponding to this OPP
+ * @u_amp:	Maximum current drawn by the device in microamperes
+ *
+ * This structure stores the voltage/current values for a single power supply.
+ */
+struct dev_pm_opp_supply {
+	unsigned long u_volt;
+	unsigned long u_volt_min;
+	unsigned long u_volt_max;
+	unsigned long u_amp;
+};
+
+/**
+ * struct dev_pm_opp_info - OPP freq/voltage/current values
+ * @rate:	Target clk rate in hz
+ * @supplies:	Array of voltage/current values for all power supplies
+ *
+ * This structure stores the freq/voltage/current values for a single OPP.
+ */
+struct dev_pm_opp_info {
+	unsigned long rate;
+	struct dev_pm_opp_supply *supplies;
+};
+
+/**
+ * struct dev_pm_set_opp_data - Set OPP data
+ * @old_opp:	Old OPP info
+ * @new_opp:	New OPP info
+ * @regulators:	Array of regulator pointers
+ * @regulator_count: Number of regulators
+ * @clk:	Pointer to clk
+ * @dev:	Pointer to the struct device
+ *
+ * This structure contains all information required for setting an OPP.
+ */
+struct dev_pm_set_opp_data {
+	struct dev_pm_opp_info old_opp;
+	struct dev_pm_opp_info new_opp;
+
+	struct regulator **regulators;
+	unsigned int regulator_count;
+	struct clk *clk;
+	struct device *dev;
+};
+
 #if defined(CONFIG_PM_OPP)
 
 unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
@@ -63,8 +113,10 @@ int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
 void dev_pm_opp_put_supported_hw(struct device *dev);
 int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
 void dev_pm_opp_put_prop_name(struct device *dev);
-struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name);
-void dev_pm_opp_put_regulator(struct opp_table *opp_table);
+int dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count);
+void dev_pm_opp_put_regulators(struct device *dev);
+int dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data));
+void dev_pm_opp_register_put_opp_helper(struct device *dev);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask);
 int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
@@ -164,6 +216,14 @@ static inline int dev_pm_opp_set_supported_hw(struct device *dev,
 
 static inline void dev_pm_opp_put_supported_hw(struct device *dev) {}
 
+static inline int dev_pm_opp_register_set_opp_helper(struct device *dev,
+			int (*set_opp)(struct dev_pm_set_opp_data *data))
+{
+	return -ENOTSUPP;
+}
+
+static inline void dev_pm_opp_register_put_opp_helper(struct device *dev) {}
+
 static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 {
 	return -ENOTSUPP;
@@ -171,12 +231,12 @@ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 
 static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
 
-static inline struct opp_table *dev_pm_opp_set_regulator(struct device *dev, const char *name)
+static inline int dev_pm_opp_set_regulators(struct device *dev, const char *names[], unsigned int count)
 {
-	return ERR_PTR(-ENOTSUPP);
+	return -ENOTSUPP;
 }
 
-static inline void dev_pm_opp_put_regulator(struct opp_table *opp_table) {}
+static inline void dev_pm_opp_put_regulators(struct device *dev) {}
 
 static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
@@ -209,6 +269,7 @@ void dev_pm_opp_of_remove_table(struct device *dev);
 int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask);
 void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask);
 int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
+struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev);
 #else
 static inline int dev_pm_opp_of_add_table(struct device *dev)
 {
@@ -232,6 +293,11 @@ static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct
 {
 	return -ENOTSUPP;
 }
+
+static inline struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev)
+{
+	return NULL;
+}
 #endif
 
 #endif		/* __LINUX_OPP_H__ */
diff --git b/include/linux/power/pwrseq.h b/include/linux/power/pwrseq.h
new file mode 100644
index 0000000..4f37275
--- /dev/null
+++ b/include/linux/power/pwrseq.h
@@ -0,0 +1,72 @@
+#ifndef __LINUX_PWRSEQ_H
+#define __LINUX_PWRSEQ_H
+
+#include <linux/of.h>
+
+#define PWRSEQ_MAX_CLKS		3
+
+struct pwrseq {
+	const struct of_device_id *pwrseq_of_match_table;
+	struct list_head node;
+	int (*get)(struct device_node *np, struct pwrseq *p);
+	int (*on)(struct pwrseq *p);
+	void (*off)(struct pwrseq *p);
+	void (*put)(struct pwrseq *p);
+	int (*suspend)(struct pwrseq *p);
+	int (*resume)(struct pwrseq *p);
+	bool used;
+};
+
+/* used for power sequence instance list in one driver */
+struct pwrseq_list_per_dev {
+	struct pwrseq *pwrseq;
+	struct list_head list;
+};
+
+#if IS_ENABLED(CONFIG_POWER_SEQUENCE)
+int pwrseq_get(struct device_node *np, struct pwrseq *p);
+int pwrseq_on(struct pwrseq *p);
+void pwrseq_off(struct pwrseq *p);
+void pwrseq_put(struct pwrseq *p);
+int pwrseq_suspend(struct pwrseq *p);
+int pwrseq_resume(struct pwrseq *p);
+void pwrseq_register(struct pwrseq *pwrseq);
+void pwrseq_unregister(struct pwrseq *pwrseq);
+struct pwrseq *of_pwrseq_on(struct device_node *np);
+void of_pwrseq_off(struct pwrseq *pwrseq);
+int of_pwrseq_on_list(struct device_node *np, struct list_head *head);
+void of_pwrseq_off_list(struct list_head *head);
+#else
+static inline int pwrseq_get(struct device_node *np, struct pwrseq *p)
+{
+	return 0;
+}
+static inline int pwrseq_on(struct pwrseq *p)
+{
+	return 0;
+}
+static inline void pwrseq_off(struct pwrseq *p) {}
+static inline void pwrseq_put(struct pwrseq *p) {}
+static inline int pwrseq_suspend(struct pwrseq *p)
+{
+	return 0;
+}
+static inline int pwrseq_resume(struct pwrseq *p)
+{
+	return 0;
+}
+static inline void pwrseq_register(struct pwrseq *pwrseq) {}
+static inline void pwrseq_unregister(struct pwrseq *pwrseq) {}
+static inline struct pwrseq *of_pwrseq_on(struct device_node *np)
+{
+	return NULL;
+}
+void of_pwrseq_off(struct pwrseq *pwrseq) {}
+int of_pwrseq_on_list(struct device_node *np, struct list_head *head)
+{
+	return 0;
+}
+void of_pwrseq_off_list(struct list_head *head) {}
+#endif /* CONFIG_POWER_SEQUENCE */
+
+#endif  /* __LINUX_PWRSEQ_H */
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 29fa81f..8635417 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -9,7 +9,6 @@
 
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
-#include <linux/preempt.h>
 
 struct worker_pool;
 
@@ -60,7 +59,7 @@ struct worker {
  */
 static inline struct worker *current_wq_worker(void)
 {
-	if (in_task() && (current->flags & PF_WQ_WORKER))
+	if (current->flags & PF_WQ_WORKER)
 		return kthread_data(current);
 	return NULL;
 }
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 0967771..319e09d 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -35,6 +35,7 @@
 #ifdef CONFIG_BLOCK
 #include <linux/blkdev.h>
 #endif
+#include <linux/of.h>
 
 #include "../mm/internal.h"	/* For the trace_print_flags arrays */
 
@@ -1470,6 +1471,141 @@ char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt)
 	return format_flags(buf, end, flags, names);
 }
 
+/* helper method for calculating extends on first pass  and filling in later */
+static noinline_for_stack
+void append_str(const char *str, int pass, int *lenp, char **bufp, char *end)
+{
+	int len;
+
+	len = strlen(str);
+	if (pass == 1)
+		*lenp += len;
+	else {
+		if (len > (end - *bufp))
+			len = end - *bufp;
+		memcpy(*bufp, str, len);
+		*bufp += len;
+	}
+}
+
+static noinline_for_stack
+char *device_node_string(char *buf, char *end, struct device_node *dn,
+			 struct printf_spec spec, const char *fmt)
+{
+	char tbuf[sizeof("xxxxxxxxxx") + 1];
+	const char *fmtp, *p;
+	int len, ret, i, j, pass;
+	char c;
+
+	if (!IS_ENABLED(CONFIG_OF)) {
+		/* if OF is not enabled just print the pointer */
+		spec.flags |= SMALL;
+		if (spec.field_width == -1) {
+			spec.field_width = 2 * sizeof(void *);
+			spec.flags |= ZEROPAD;
+		}
+		spec.base = 16;
+		return number(buf, end, (unsigned long) dn, spec);
+	}
+
+	if ((unsigned long)dn < PAGE_SIZE)
+		return string(buf, end, "(null)", spec);
+
+	/* simple case without anything any more format specifiers */
+	if (fmt[1] == '\0' || isspace(fmt[1]))
+		fmt = "Of";
+
+	len = 0;
+
+	/* two passes; the first calculates length, the second fills in */
+	for (pass = 1; pass <= 2; pass++) {
+		if (pass == 2 && !(spec.flags & LEFT)) {
+			/* padding */
+			while (len < spec.field_width--) {
+				if (buf < end)
+					*buf = ' ';
+				++buf;
+			}
+		}
+
+		for (fmtp = fmt + 1, j = 0; (c = *fmtp++) != '\0'; ) {
+
+			/* validate option */
+			if (c != 'f' && c != 'n' && c != 'p' && c != 'P' &&
+			    c != 'F' && c != 'c' && c != 'C' && c != 'r')
+				continue;
+
+			/* handle separator */
+			if (j++ > 0)
+				append_str("|", pass, &len, &buf, end);
+
+			switch (c) {
+			case 'f':	/* full_name */
+				append_str(of_node_full_name(dn), pass, &len,
+						&buf, end);
+				break;
+			case 'n':	/* name */
+				append_str(dn->name, pass, &len, &buf, end);
+				break;
+			case 'p':	/* phandle */
+				snprintf(tbuf, sizeof(tbuf), "%u",
+						(unsigned int)dn->phandle);
+				append_str(tbuf, pass, &len, &buf, end);
+				break;
+			case 'P':	/* path-spec */
+				append_str(dn->name, pass, &len, &buf, end);
+				/* need to tack on the @ postfix */
+				p = strchr(of_node_full_name(dn), '@');
+				if (p)
+					append_str(p, pass, &len, &buf, end);
+				break;
+			case 'F':	/* flags */
+				snprintf(tbuf, sizeof(tbuf), "%c%c%c%c",
+					of_node_check_flag(dn, OF_DYNAMIC) ?
+						'D' : '-',
+					of_node_check_flag(dn, OF_DETACHED) ?
+						'd' : '-',
+					of_node_check_flag(dn, OF_POPULATED) ?
+						'P' : '-',
+					of_node_check_flag(dn,
+						OF_POPULATED_BUS) ?  'B' : '-');
+				append_str(tbuf, pass, &len, &buf, end);
+				break;
+			case 'c':	/* major compatible string */
+				ret = of_property_read_string(dn, "compatible",
+						&p);
+				if (ret == 0)
+					append_str(p, pass, &len, &buf, end);
+				break;
+			case 'C':	/* full compatible string */
+				i = 0;
+				while (of_property_read_string_index(dn,
+						"compatible", i, &p) == 0) {
+					append_str(i == 0 ? "\"" : "\",\"",
+							pass, &len, &buf, end);
+					append_str(p, pass, &len, &buf, end);
+					i++;
+				}
+				if (i > 0)
+					append_str("\"", pass, &len, &buf, end);
+				break;
+			case 'r':	/* node reference count */
+				snprintf(tbuf, sizeof(tbuf), "%u",
+					atomic_read(&dn->kobj.kref.refcount));
+				append_str(tbuf, pass, &len, &buf, end);
+				break;
+			default:
+				break;
+			}
+		}
+	}
+	/* finish up */
+	while (buf < end && len < spec.field_width--)
+		*buf++ = ' ';
+
+	return buf;
+}
+
 int kptr_restrict __read_mostly;
 
 /*
@@ -1563,6 +1699,16 @@ int kptr_restrict __read_mostly;
  *       p page flags (see struct page) given as pointer to unsigned long
  *       g gfp flags (GFP_* and __GFP_*) given as pointer to gfp_t
  *       v vma flags (VM_*) given as pointer to unsigned long
+ * - 'O[fnpPcCFr]' For an DT device node
+ *                Without any optional arguments prints the full_name
+ *                f device node full_name
+ *                n device node name
+ *                p device node phandle
+ *                P device node path spec (name + @unit)
+ *                F device node flags
+ *                c major compatible string
+ *                C full compatible string
+ *                r node reference count
  *
  * ** Please update also Documentation/printk-formats.txt when making changes **
  *
@@ -1718,6 +1864,10 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 
 	case 'G':
 		return flags_string(buf, end, ptr, fmt);
+
+	case 'O':
+		return device_node_string(buf, end, ptr, spec, fmt);
+
 	}
 	spec.flags |= SMALL;
 	if (spec.field_width == -1) {
diff --git a/net/Kconfig b/net/Kconfig
index 7b6cd34..f5cf545 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -83,6 +83,7 @@ config INET
 	  Short answer: say Y.
 
 if INET
+source "net/wireguard/Kconfig"
 source "net/ipv4/Kconfig"
 source "net/ipv6/Kconfig"
 source "net/netlabel/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 4cafaa2..9c7a1aa 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_NET)		+= $(tmp-y)
 obj-$(CONFIG_LLC)		+= llc/
 obj-$(CONFIG_NET)		+= ethernet/ 802/ sched/ netlink/
 obj-$(CONFIG_NETFILTER)		+= netfilter/
+obj-$(CONFIG_WIREGUARD)		+= wireguard/
 obj-$(CONFIG_INET)		+= ipv4/
 obj-$(CONFIG_XFRM)		+= xfrm/
 obj-$(CONFIG_UNIX)		+= unix/
diff --git b/net/wireguard/Kconfig b/net/wireguard/Kconfig
new file mode 100644
index 0000000..4385995
--- /dev/null
+++ b/net/wireguard/Kconfig
@@ -0,0 +1,30 @@
+config WIREGUARD
+	tristate "IP: WireGuard secure network tunnel"
+	depends on NET && INET
+	select NET_UDP_TUNNEL
+	select CRYPTO_BLKCIPHER
+	select VFP
+	select VFPv3
+	select NEON
+	select KERNEL_MODE_NEON
+	default m
+	---help---
+	  WireGuard is a secure, fast, and easy to use replacement for IPSec
+	  that uses modern cryptography and clever networking tricks. It's
+	  designed to be fairly general purpose and abstract enough to fit most
+	  use cases, while at the same time remaining extremely simple to
+	  configure. See www.wireguard.com for more info.
+
+	  It's safe to say Y or M here, as the driver is very lightweight and
+	  is only in use when an administrator chooses to add an interface.
+
+config WIREGUARD_DEBUG
+	bool "Debugging checks and verbose messages"
+	depends on WIREGUARD
+	---help---
+	  This will write log messages for handshake and other events
+	  that occur for a WireGuard interface. It will also perform some
+	  extra validation checks and unit tests at various points. This is
+	  only useful for debugging.
+
+	  Say N here unless you know what you're doing.
diff --git b/net/wireguard/Makefile b/net/wireguard/Makefile
new file mode 100644
index 0000000..e1cedd9
--- /dev/null
+++ b/net/wireguard/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+
+ccflags-y := -O3 -fvisibility=hidden
+ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG -g
+ccflags-y += -Wframe-larger-than=8192
+ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
+wireguard-y := main.o noise.o device.o peer.o timers.o queueing.o send.o receive.o socket.o hashtables.o allowedips.o ratelimiter.o cookie.o netlink.o
+wireguard-y += crypto/curve25519.o crypto/chacha20poly1305.o crypto/blake2s.o
+
+wireguard-$(CONFIG_X86_64) += crypto/chacha20-x86_64.o crypto/poly1305-x86_64.o crypto/blake2s-x86_64.o crypto/curve25519-x86_64.o
+wireguard-$(CONFIG_ARM) += crypto/chacha20-arm.o crypto/poly1305-arm.o crypto/curve25519-arm.o
+wireguard-$(CONFIG_ARM64) += crypto/chacha20-arm64.o crypto/poly1305-arm64.o
+wireguard-$(if $(filter yy,$(CONFIG_MIPS)$(CONFIG_64BIT)),y,n) += crypto/poly1305-mips64.o
+
+include $(src)/compat/Makefile.include
+
+obj-$(if $(KBUILD_EXTMOD),m,$(CONFIG_WIREGUARD)) := wireguard.o
diff --git b/net/wireguard/allowedips.c b/net/wireguard/allowedips.c
new file mode 100644
index 0000000..a0f199f
--- /dev/null
+++ b/net/wireguard/allowedips.c
@@ -0,0 +1,326 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "allowedips.h"
+#include "peer.h"
+
+struct allowedips_node {
+	struct wireguard_peer *peer;
+	struct rcu_head rcu;
+	struct allowedips_node __rcu *bit[2];
+	/* While it may seem scandalous that we waste space for v4,
+	 * we're alloc'ing to the nearest power of 2 anyway, so this
+	 * doesn't actually make a difference.
+	 */
+	union {
+		__be64 v6[2];
+		__be32 v4;
+		u8 bits[16];
+	};
+	u8 cidr, bit_at_a, bit_at_b;
+};
+
+static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, u8 cidr)
+{
+	node->cidr = cidr;
+	node->bit_at_a = cidr / 8;
+	node->bit_at_b = 7 - (cidr % 8);
+	if (cidr) {
+		memcpy(node->bits, src, (cidr + 7) / 8);
+		node->bits[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
+	}
+}
+
+#define choose_node(parent, key) parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
+
+static void node_free_rcu(struct rcu_head *rcu)
+{
+	kfree(container_of(rcu, struct allowedips_node, rcu));
+}
+
+#define push(stack, p, len) ({ \
+	if (rcu_access_pointer(p)) { \
+		BUG_ON(len >= 128); \
+		stack[len++] = rcu_dereference_protected(p, lockdep_is_held(lock)); \
+	} \
+	true; \
+})
+static void free_root_node(struct allowedips_node __rcu *top, struct mutex *lock)
+{
+	struct allowedips_node *stack[128], *node;
+	unsigned int len;
+
+	for (len = 0, push(stack, top, len); len > 0 && (node = stack[--len]) && push(stack, node->bit[0], len) && push(stack, node->bit[1], len);)
+		call_rcu_bh(&node->rcu, node_free_rcu);
+}
+
+static int walk_by_peer(struct allowedips_node __rcu *top, int family, struct allowedips_cursor *cursor, struct wireguard_peer *peer, int (*func)(void *ctx, const u8 *ip, u8 cidr, int family), void *ctx, struct mutex *lock)
+{
+	struct allowedips_node *node;
+	int ret;
+
+	if (!rcu_access_pointer(top))
+		return 0;
+
+	if (!cursor->len)
+		push(cursor->stack, top, cursor->len);
+
+	for (; cursor->len > 0 && (node = cursor->stack[cursor->len - 1]); --cursor->len, push(cursor->stack, node->bit[0], cursor->len), push(cursor->stack, node->bit[1], cursor->len)) {
+		if (node->peer != peer)
+			continue;
+		ret = func(ctx, node->bits, node->cidr, family);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+#undef push
+
+#define ref(p) rcu_access_pointer(p)
+#define deref(p) rcu_dereference_protected(*p, lockdep_is_held(lock))
+#define push(p) ({ BUG_ON(len >= 128); stack[len++] = p; })
+static void walk_remove_by_peer(struct allowedips_node __rcu **top, struct wireguard_peer *peer, struct mutex *lock)
+{
+	struct allowedips_node __rcu **stack[128], **nptr;
+	struct allowedips_node *node, *prev;
+	unsigned int len;
+
+	if (unlikely(!peer || !ref(*top)))
+		return;
+
+	for (prev = NULL, len = 0, push(top); len > 0; prev = node) {
+		nptr = stack[len - 1];
+		node = deref(nptr);
+		if (!node) {
+			--len;
+			continue;
+		}
+		if (!prev || ref(prev->bit[0]) == node || ref(prev->bit[1]) == node) {
+			if (ref(node->bit[0]))
+				push(&node->bit[0]);
+			else if (ref(node->bit[1]))
+				push(&node->bit[1]);
+		} else if (ref(node->bit[0]) == prev) {
+			if (ref(node->bit[1]))
+				push(&node->bit[1]);
+		} else {
+			if (node->peer == peer) {
+				node->peer = NULL;
+				if (!node->bit[0] || !node->bit[1]) {
+					rcu_assign_pointer(*nptr, deref(&node->bit[!ref(node->bit[0])]));
+					call_rcu_bh(&node->rcu, node_free_rcu);
+					node = deref(nptr);
+				}
+			}
+			--len;
+		}
+	}
+}
+#undef ref
+#undef deref
+#undef push
+
+static __always_inline unsigned int fls128(u64 a, u64 b)
+{
+	return a ? fls64(a) + 64 : fls64(b);
+}
+
+static __always_inline u8 common_bits(const struct allowedips_node *node, const u8 *key, u8 bits)
+{
+	if (bits == 32)
+		return 32 - fls(be32_to_cpu(*(const __be32 *)node->bits ^ *(const __be32 *)key));
+	else if (bits == 128)
+		return 128 - fls128(be64_to_cpu(*(const __be64 *)&node->bits[0] ^ *(const __be64 *)&key[0]), be64_to_cpu(*(const __be64 *)&node->bits[8] ^ *(const __be64 *)&key[8]));
+	return 0;
+}
+
+/* This could be much faster if it actually just compared the common bits properly,
+ * by precomputing a mask bswap(~0 << (32 - cidr)), and the rest, but it turns out that
+ * common_bits is already super fast on modern processors, even taking into account
+ * the unfortunate bswap. So, we just inline it like this instead. */
+#define prefix_matches(node, key, bits) (common_bits(node, key, bits) >= node->cidr)
+
+static __always_inline struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits, const u8 *key)
+{
+	struct allowedips_node *node = trie, *found = NULL;
+
+	while (node && prefix_matches(node, key, bits)) {
+		if (node->peer)
+			found = node;
+		if (node->cidr == bits)
+			break;
+		node = rcu_dereference_bh(choose_node(node, key));
+	}
+	return found;
+}
+
+/* Returns a strong reference to a peer */
+static __always_inline struct wireguard_peer *lookup(struct allowedips_node __rcu *root, u8 bits, const void *ip)
+{
+	struct wireguard_peer *peer = NULL;
+	struct allowedips_node *node;
+
+	rcu_read_lock_bh();
+	node = find_node(rcu_dereference_bh(root), bits, ip);
+	if (node)
+		peer = peer_get(node->peer);
+	rcu_read_unlock_bh();
+	return peer;
+}
+
+static inline bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, u8 cidr, u8 bits, struct allowedips_node **rnode, struct mutex *lock)
+{
+	bool exact = false;
+	struct allowedips_node *parent = NULL, *node = rcu_dereference_protected(trie, lockdep_is_held(lock));
+
+	while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) {
+		parent = node;
+		if (parent->cidr == cidr) {
+			exact = true;
+			break;
+		}
+		node = rcu_dereference_protected(choose_node(parent, key), lockdep_is_held(lock));
+	}
+	*rnode = parent;
+	return exact;
+}
+
+static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, u8 cidr, struct wireguard_peer *peer, struct mutex *lock)
+{
+	struct allowedips_node *node, *parent, *down, *newnode;
+
+	if (unlikely(cidr > bits || !peer))
+		return -EINVAL;
+
+	if (!rcu_access_pointer(*trie)) {
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
+		if (!node)
+			return -ENOMEM;
+		node->peer = peer;
+		copy_and_assign_cidr(node, key, cidr);
+		rcu_assign_pointer(*trie, node);
+		return 0;
+	}
+	if (node_placement(*trie, key, cidr, bits, &node, lock)) {
+		node->peer = peer;
+		return 0;
+	}
+
+	newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
+	if (!newnode)
+		return -ENOMEM;
+	newnode->peer = peer;
+	copy_and_assign_cidr(newnode, key, cidr);
+
+	if (!node)
+		down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
+	else {
+		down = rcu_dereference_protected(choose_node(node, key), lockdep_is_held(lock));
+		if (!down) {
+			rcu_assign_pointer(choose_node(node, key), newnode);
+			return 0;
+		}
+	}
+	cidr = min(cidr, common_bits(down, key, bits));
+	parent = node;
+
+	if (newnode->cidr == cidr) {
+		rcu_assign_pointer(choose_node(newnode, down->bits), down);
+		if (!parent)
+			rcu_assign_pointer(*trie, newnode);
+		else
+			rcu_assign_pointer(choose_node(parent, newnode->bits), newnode);
+	} else {
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
+		if (!node) {
+			kfree(newnode);
+			return -ENOMEM;
+		}
+		copy_and_assign_cidr(node, newnode->bits, cidr);
+
+		rcu_assign_pointer(choose_node(node, down->bits), down);
+		rcu_assign_pointer(choose_node(node, newnode->bits), newnode);
+		if (!parent)
+			rcu_assign_pointer(*trie, node);
+		else
+			rcu_assign_pointer(choose_node(parent, node->bits), node);
+	}
+	return 0;
+}
+
+void allowedips_init(struct allowedips *table)
+{
+	table->root4 = table->root6 = NULL;
+	table->seq = 1;
+}
+
+void allowedips_free(struct allowedips *table, struct mutex *lock)
+{
+	++table->seq;
+	free_root_node(table->root4, lock);
+	rcu_assign_pointer(table->root4, NULL);
+	free_root_node(table->root6, lock);
+	rcu_assign_pointer(table->root6, NULL);
+}
+
+int allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, u8 cidr, struct wireguard_peer *peer, struct mutex *lock)
+{
+	++table->seq;
+	return add(&table->root4, 32, (const u8 *)ip, cidr, peer, lock);
+}
+
+int allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, u8 cidr, struct wireguard_peer *peer, struct mutex *lock)
+{
+	++table->seq;
+	return add(&table->root6, 128, (const u8 *)ip, cidr, peer, lock);
+}
+
+void allowedips_remove_by_peer(struct allowedips *table, struct wireguard_peer *peer, struct mutex *lock)
+{
+	++table->seq;
+	walk_remove_by_peer(&table->root4, peer, lock);
+	walk_remove_by_peer(&table->root6, peer, lock);
+}
+
+int allowedips_walk_by_peer(struct allowedips *table, struct allowedips_cursor *cursor, struct wireguard_peer *peer, int (*func)(void *ctx, const u8 *ip, u8 cidr, int family), void *ctx, struct mutex *lock)
+{
+	int ret;
+
+	if (!cursor->seq)
+		cursor->seq = table->seq;
+	else if (cursor->seq != table->seq)
+		return 0;
+
+	if (!cursor->second_half) {
+		ret = walk_by_peer(table->root4, AF_INET, cursor, peer, func, ctx, lock);
+		if (ret)
+			return ret;
+		cursor->len = 0;
+		cursor->second_half = true;
+	}
+	return walk_by_peer(table->root6, AF_INET6, cursor, peer, func, ctx, lock);
+}
+
+/* Returns a strong reference to a peer */
+struct wireguard_peer *allowedips_lookup_dst(struct allowedips *table, struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_IP))
+		return lookup(table->root4, 32, &ip_hdr(skb)->daddr);
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr);
+	return NULL;
+}
+
+/* Returns a strong reference to a peer */
+struct wireguard_peer *allowedips_lookup_src(struct allowedips *table, struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_IP))
+		return lookup(table->root4, 32, &ip_hdr(skb)->saddr);
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr);
+	return NULL;
+}
+
+#include "selftest/allowedips.h"
diff --git b/net/wireguard/allowedips.h b/net/wireguard/allowedips.h
new file mode 100644
index 0000000..7b43382
--- /dev/null
+++ b/net/wireguard/allowedips.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_ALLOWEDIPS_H
+#define _WG_ALLOWEDIPS_H
+
+#include <linux/mutex.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+struct wireguard_peer;
+struct allowedips_node;
+
+struct allowedips {
+	struct allowedips_node __rcu *root4;
+	struct allowedips_node __rcu *root6;
+	u64 seq;
+};
+
+struct allowedips_cursor {
+	u64 seq;
+	struct allowedips_node *stack[128];
+	unsigned int len;
+	bool second_half;
+};
+
+void allowedips_init(struct allowedips *table);
+void allowedips_free(struct allowedips *table, struct mutex *mutex);
+int allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, u8 cidr, struct wireguard_peer *peer, struct mutex *lock);
+int allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, u8 cidr, struct wireguard_peer *peer, struct mutex *lock);
+void allowedips_remove_by_peer(struct allowedips *table, struct wireguard_peer *peer, struct mutex *lock);
+int allowedips_walk_by_peer(struct allowedips *table, struct allowedips_cursor *cursor, struct wireguard_peer *peer, int (*func)(void *ctx, const u8 *ip, u8 cidr, int family), void *ctx, struct mutex *lock);
+
+/* These return a strong reference to a peer: */
+struct wireguard_peer *allowedips_lookup_dst(struct allowedips *table, struct sk_buff *skb);
+struct wireguard_peer *allowedips_lookup_src(struct allowedips *table, struct sk_buff *skb);
+
+#ifdef DEBUG
+bool allowedips_selftest(void);
+#endif
+
+#endif /* _WG_ALLOWEDIPS_H */
diff --git b/net/wireguard/compat/Makefile.include b/net/wireguard/compat/Makefile.include
new file mode 100644
index 0000000..b7930b6
--- /dev/null
+++ b/net/wireguard/compat/Makefile.include
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+
+ifeq ($(wildcard $(src)/compat/compat.h),)
+ccflags-y += -include $(srctree)/$(src)/compat/compat.h
+asflags-y += -include $(srctree)/$(src)/compat/compat-asm.h
+else
+ccflags-y += -include $(src)/compat/compat.h
+asflags-y += -include $(src)/compat/compat-asm.h
+endif
+
+ifeq ($(wildcard $(srctree)/include/linux/ptr_ring.h),)
+ccflags-y += -I$(src)/compat/ptr_ring/include
+endif
+
+ifeq ($(wildcard $(srctree)/include/linux/siphash.h),)
+ccflags-y += -I$(src)/compat/siphash/include
+wireguard-y += compat/siphash/siphash.o
+endif
+
+ifeq ($(wildcard $(srctree)/include/net/dst_cache.h),)
+ccflags-y += -I$(src)/compat/dst_cache/include
+wireguard-y += compat/dst_cache/dst_cache.o
+endif
+
+ifeq ($(wildcard $(srctree)/arch/x86/include/asm/fpu/api.h),)
+ccflags-y += -I$(src)/compat/fpu/include
+endif
+
+ifeq ($(wildcard $(srctree)/arch/x86/include/asm/simd.h),)
+ccflags-y += -I$(src)/compat/simd/include
+endif
+
+ifeq ($(wildcard $(srctree)/include/net/udp_tunnel.h),)
+ccflags-y += -I$(src)/compat/udp_tunnel/include
+wireguard-y += compat/udp_tunnel/udp_tunnel.o
+endif
+
+ifeq ($(shell grep -F "int crypto_memneq" "$(srctree)/include/crypto/algapi.h"),)
+ccflags-y += -include $(src)/compat/memneq/include.h
+wireguard-y += compat/memneq/memneq.o
+endif
+
+ifeq ($(CONFIG_X86_64),y)
+	ifeq ($(ssse3_instr),)
+		ssse3_instr := $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
+		ccflags-y += $(ssse3_instr)
+		asflags-y += $(ssse3_instr)
+	endif
+	ifeq ($(avx_instr),)
+		avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
+		ccflags-y += $(avx_instr)
+		asflags-y += $(avx_instr)
+	endif
+	ifeq ($(avx2_instr),)
+		avx2_instr := $(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
+		ccflags-y += $(avx2_instr)
+		asflags-y += $(avx2_instr)
+	endif
+	ifeq ($(avx512_instr),)
+		avx512_instr := $(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
+		ccflags-y += $(avx512_instr)
+		asflags-y += $(avx512_instr)
+	endif
+endif
diff --git b/net/wireguard/compat/checksum/checksum_partial_compat.h b/net/wireguard/compat/checksum/checksum_partial_compat.h
new file mode 100644
index 0000000..115cf07
--- /dev/null
+++ b/net/wireguard/compat/checksum/checksum_partial_compat.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <net/route.h>
+#include <net/esp.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+
+#define IP6_MF          0x0001
+#define IP6_OFFSET      0xFFF8
+static inline int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, unsigned int max)
+{
+	if (skb_headlen(skb) >= len)
+		return 0;
+	if (max > skb->len)
+		max = skb->len;
+	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
+		return -ENOMEM;
+	if (skb_headlen(skb) < len)
+		return -EPROTO;
+	return 0;
+}
+#define MAX_IP_HDR_LEN 128
+static inline int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
+{
+	unsigned int off;
+	bool fragment;
+	int err;
+	fragment = false;
+	err = skb_maybe_pull_tail(skb, sizeof(struct iphdr), MAX_IP_HDR_LEN);
+	if (err < 0)
+		goto out;
+	if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
+		fragment = true;
+	off = ip_hdrlen(skb);
+	err = -EPROTO;
+	if (fragment)
+		goto out;
+	switch (ip_hdr(skb)->protocol) {
+	case IPPROTO_TCP:
+		err = skb_maybe_pull_tail(skb,
+					  off + sizeof(struct tcphdr),
+					  MAX_IP_HDR_LEN);
+		if (err < 0)
+			goto out;
+
+		if (!skb_partial_csum_set(skb, off,
+					  offsetof(struct tcphdr, check))) {
+			err = -EPROTO;
+			goto out;
+		}
+
+		if (recalculate)
+			tcp_hdr(skb)->check =
+				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						   ip_hdr(skb)->daddr,
+						   skb->len - off,
+						   IPPROTO_TCP, 0);
+		break;
+	case IPPROTO_UDP:
+		err = skb_maybe_pull_tail(skb,
+					  off + sizeof(struct udphdr),
+					  MAX_IP_HDR_LEN);
+		if (err < 0)
+			goto out;
+
+		if (!skb_partial_csum_set(skb, off,
+					  offsetof(struct udphdr, check))) {
+			err = -EPROTO;
+			goto out;
+		}
+
+		if (recalculate)
+			udp_hdr(skb)->check =
+				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						   ip_hdr(skb)->daddr,
+						   skb->len - off,
+						   IPPROTO_UDP, 0);
+		break;
+	default:
+		goto out;
+	}
+	err = 0;
+out:
+	return err;
+}
+#define MAX_IPV6_HDR_LEN 256
+#define OPT_HDR(type, skb, off) \
+	(type *)(skb_network_header(skb) + (off))
+static inline int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
+{
+	int err;
+	u8 nexthdr;
+	unsigned int off;
+	unsigned int len;
+	bool fragment;
+	bool done;
+	fragment = false;
+	done = false;
+	off = sizeof(struct ipv6hdr);
+	err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
+	if (err < 0)
+		goto out;
+	nexthdr = ipv6_hdr(skb)->nexthdr;
+	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
+	while (off <= len && !done) {
+		switch (nexthdr) {
+		case IPPROTO_DSTOPTS:
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_ROUTING: {
+			struct ipv6_opt_hdr *hp;
+
+			err = skb_maybe_pull_tail(skb, off + sizeof(struct ipv6_opt_hdr), MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+			hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
+			nexthdr = hp->nexthdr;
+			off += ipv6_optlen(hp);
+			break;
+		}
+		case IPPROTO_FRAGMENT: {
+			struct frag_hdr *hp;
+			err = skb_maybe_pull_tail(skb, off + sizeof(struct frag_hdr), MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+			hp = OPT_HDR(struct frag_hdr, skb, off);
+			if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
+				fragment = true;
+			nexthdr = hp->nexthdr;
+			off += sizeof(struct frag_hdr);
+			break;
+		}
+		default:
+			done = true;
+			break;
+		}
+	}
+	err = -EPROTO;
+	if (!done || fragment)
+		goto out;
+	switch (nexthdr) {
+		case IPPROTO_TCP:
+			err = skb_maybe_pull_tail(skb,
+						  off + sizeof(struct tcphdr),
+						  MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			if (!skb_partial_csum_set(skb, off,
+						  offsetof(struct tcphdr, check))) {
+				err = -EPROTO;
+				goto out;
+			}
+
+			if (recalculate)
+				tcp_hdr(skb)->check =
+					~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+							 &ipv6_hdr(skb)->daddr,
+							 skb->len - off,
+							 IPPROTO_TCP, 0);
+			break;
+		case IPPROTO_UDP:
+			err = skb_maybe_pull_tail(skb,
+						  off + sizeof(struct udphdr),
+						  MAX_IPV6_HDR_LEN);
+			if (err < 0)
+				goto out;
+
+			if (!skb_partial_csum_set(skb, off,
+						  offsetof(struct udphdr, check))) {
+				err = -EPROTO;
+				goto out;
+			}
+
+			if (recalculate)
+				udp_hdr(skb)->check =
+					~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+							 &ipv6_hdr(skb)->daddr,
+							 skb->len - off,
+							 IPPROTO_UDP, 0);
+			break;
+		default:
+			goto out;
+	}
+	err = 0;
+out:
+	return err;
+}
+static inline int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
+{
+	int err;
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		err = skb_checksum_setup_ip(skb, recalculate);
+		break;
+
+	case htons(ETH_P_IPV6):
+		err = skb_checksum_setup_ipv6(skb, recalculate);
+		break;
+	default:
+		err = -EPROTO;
+		break;
+	}
+	return err;
+}
diff --git b/net/wireguard/compat/compat-asm.h b/net/wireguard/compat/compat-asm.h
new file mode 100644
index 0000000..418c566
--- /dev/null
+++ b/net/wireguard/compat/compat-asm.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_COMPATASM_H
+#define _WG_COMPATASM_H
+
+#include <linux/linkage.h>
+#include <linux/kconfig.h>
+
+/* PaX compatibility */
+#if defined(RAP_PLUGIN)
+#undef ENTRY
+#define ENTRY RAP_ENTRY
+#endif
+
+#endif /* _WG_COMPATASM_H */
diff --git b/net/wireguard/compat/compat.h b/net/wireguard/compat/compat.h
new file mode 100644
index 0000000..f4716d1
--- /dev/null
+++ b/net/wireguard/compat/compat.h
@@ -0,0 +1,628 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_COMPAT_H
+#define _WG_COMPAT_H
+
+#include <linux/kconfig.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <generated/utsrelease.h>
+
+#ifdef RHEL_MAJOR
+#if RHEL_MAJOR == 7
+#define ISRHEL7
+#endif
+#endif
+#ifdef UTS_UBUNTU_RELEASE_ABI
+#if LINUX_VERSION_CODE == KERNEL_VERSION(3, 13, 11)
+#define ISUBUNTU1404
+#endif
+#endif
+#ifdef CONFIG_SUSE_KERNEL
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
+#define ISOPENSUSE42
+#endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
+#error "WireGuard requires Linux >= 3.10"
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) && !defined(ISRHEL7)
+#define headers_start data
+#define headers_end data
+#endif
+
+#include <linux/cache.h>
+#ifndef __ro_after_init
+#define __ro_after_init __read_mostly
+#endif
+
+#include <linux/compiler.h>
+#ifndef READ_ONCE
+#define READ_ONCE ACCESS_ONCE
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include "udp_tunnel/udp_tunnel_partial_compat.h"
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(DEBUG) && defined(net_dbg_ratelimited)
+#undef net_dbg_ratelimited
+#define net_dbg_ratelimited(fmt, ...) do { if (0) no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+#define RCU_LOCKDEP_WARN(cond, message) rcu_lockdep_assert(!(cond), message)
+#endif
+
+#if ((LINUX_VERSION_CODE > KERNEL_VERSION(3, 19, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 6)) || \
+    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 12) && LINUX_VERSION_CODE > KERNEL_VERSION(3, 17, 0)) || \
+    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 8) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || \
+    LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 40)) && !defined(ISRHEL7) && !defined(ISUBUNTU1404)
+#define dev_recursion_level() 0
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(ISRHEL7)
+#define ipv6_dst_lookup(a, b, c, d) ipv6_dst_lookup(b, c, d)
+#endif
+
+#if (LINUX_VERSION_CODE == KERNEL_VERSION(4, 4, 0) || \
+    (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 5) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)) || \
+    (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 17) && LINUX_VERSION_CODE > KERNEL_VERSION(3, 19, 0)) || \
+    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 27) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || \
+    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 8) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || \
+    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 40) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || \
+    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 54))) && !defined(ISUBUNTU1404)
+#include <linux/if.h>
+#include <net/ip_tunnels.h>
+#define IP6_ECN_set_ce(a, b) IP6_ECN_set_ce(b)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
+#define time_is_before_jiffies64(a) time_after64(get_jiffies_64(), a)
+#define time_is_after_jiffies64(a) time_before64(get_jiffies_64(), a)
+#define time_is_before_eq_jiffies64(a) time_after_eq64(get_jiffies_64(), a)
+#define time_is_after_eq_jiffies64(a) time_before_eq64(get_jiffies_64(), a)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && IS_ENABLED(CONFIG_IPV6) && !defined(ISRHEL7)
+#include <net/ipv6.h>
+struct ipv6_stub_type {
+	void *udpv6_encap_enable;
+	int (*ipv6_dst_lookup)(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6);
+};
+static const struct ipv6_stub_type ipv6_stub_impl = {
+	.udpv6_encap_enable = (void *)1,
+	.ipv6_dst_lookup = ip6_dst_lookup
+};
+static const struct ipv6_stub_type *ipv6_stub = &ipv6_stub_impl;
+#endif
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) && IS_ENABLED(CONFIG_IPV6) && !defined(ISOPENSUSE42) && !defined(ISRHEL7)
+#include <net/addrconf.h>
+static inline bool ipv6_mod_enabled(void)
+{
+	return ipv6_stub->udpv6_encap_enable != NULL;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+#include <linux/skbuff.h>
+static inline void skb_reset_tc(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	skb->tc_verd = 0;
+#endif
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+#include <linux/siphash.h>
+static inline u32 get_random_u32(void)
+{
+	static siphash_key_t key;
+	static u32 counter = 0;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+	static bool has_seeded = false;
+	if (unlikely(!has_seeded)) {
+		get_random_bytes(&key, sizeof(key));
+		has_seeded = true;
+	}
+#else
+	get_random_once(&key, sizeof(key));
+#endif
+	return siphash_2u32(counter++, get_random_int(), &key);
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) && !defined(ISRHEL7)
+static inline void netif_keep_dst(struct net_device *dev)
+{
+	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7)
+#include <linux/netdevice.h>
+#ifndef netdev_alloc_pcpu_stats
+#define pcpu_sw_netstats pcpu_tstats
+#endif
+#ifndef netdev_alloc_pcpu_stats
+#define netdev_alloc_pcpu_stats alloc_percpu
+#endif
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && !defined(ISRHEL7)
+#include <linux/netdevice.h>
+#ifndef netdev_alloc_pcpu_stats
+#define netdev_alloc_pcpu_stats(type)					\
+({									\
+	typeof(type) __percpu *pcpu_stats = alloc_percpu(type);		\
+	if (pcpu_stats)	{						\
+		int __cpu;						\
+		for_each_possible_cpu(__cpu) {				\
+			typeof(type) *stat;				\
+			stat = per_cpu_ptr(pcpu_stats, __cpu);		\
+			u64_stats_init(&stat->syncp);			\
+		}							\
+	}								\
+	pcpu_stats;							\
+})
+#endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#include "checksum/checksum_partial_compat.h"
+static inline void *our_pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
+{
+	if (tail != skb) {
+		skb->data_len += len;
+		skb->len += len;
+	}
+	return skb_put(tail, len);
+}
+#define pskb_put our_pskb_put
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) && !defined(ISRHEL7)
+#include <net/xfrm.h>
+static inline void skb_scrub_packet(struct sk_buff *skb, bool xnet)
+{
+#ifdef CONFIG_CAVIUM_OCTEON_IPFWD_OFFLOAD
+	memset(&skb->cvm_info, 0, sizeof(skb->cvm_info));
+	skb->cvm_reserved = 0;
+#endif
+	skb->tstamp.tv64 = 0;
+	skb->pkt_type = PACKET_HOST;
+	skb->skb_iif = 0;
+	skb_dst_drop(skb);
+	secpath_reset(skb);
+	nf_reset(skb);
+	nf_reset_trace(skb);
+	if (!xnet)
+		return;
+	skb_orphan(skb);
+	skb->mark = 0;
+}
+#endif
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) || defined(ISUBUNTU1404)) && !defined(ISRHEL7)
+#include <linux/random.h>
+static inline u32 prandom_u32_max(u32 ep_ro)
+{
+	return (u32)(((u64) prandom_u32() * ep_ro) >> 32);
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 75) && !defined(ISRHEL7)
+#ifndef U8_MAX
+#define U8_MAX ((u8)~0U)
+#endif
+#ifndef S8_MAX
+#define S8_MAX ((s8)(U8_MAX >> 1))
+#endif
+#ifndef S8_MIN
+#define S8_MIN ((s8)(-S8_MAX - 1))
+#endif
+#ifndef U16_MAX
+#define U16_MAX ((u16)~0U)
+#endif
+#ifndef S16_MAX
+#define S16_MAX ((s16)(U16_MAX >> 1))
+#endif
+#ifndef S16_MIN
+#define S16_MIN ((s16)(-S16_MAX - 1))
+#endif
+#ifndef U32_MAX
+#define U32_MAX ((u32)~0U)
+#endif
+#ifndef S32_MAX
+#define S32_MAX ((s32)(U32_MAX >> 1))
+#endif
+#ifndef S32_MIN
+#define S32_MIN ((s32)(-S32_MAX - 1))
+#endif
+#ifndef U64_MAX
+#define U64_MAX ((u64)~0ULL)
+#endif
+#ifndef S64_MAX
+#define S64_MAX ((s64)(U64_MAX >> 1))
+#endif
+#ifndef S64_MIN
+#define S64_MIN ((s64)(-S64_MAX - 1))
+#endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 60) && !defined(ISRHEL7)
+/* Making this static may very well invalidate its usefulness,
+ * but so it goes with compat code. */
+static inline void memzero_explicit(void *s, size_t count)
+{
+	memset(s, 0, count);
+	barrier();
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && !defined(ISRHEL7)
+static const struct in6_addr our_in6addr_any = IN6ADDR_ANY_INIT;
+#define in6addr_any our_in6addr_any
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+#include <linux/completion.h>
+#include <linux/random.h>
+#include <linux/errno.h>
+struct rng_initializer {
+	struct completion done;
+	struct random_ready_callback cb;
+};
+static inline void rng_initialized_callback(struct random_ready_callback *cb)
+{
+	complete(&container_of(cb, struct rng_initializer, cb)->done);
+}
+static inline int wait_for_random_bytes(void)
+{
+	static bool rng_is_initialized = false;
+	int ret;
+	if (unlikely(!rng_is_initialized)) {
+		struct rng_initializer rng = {
+			.done = COMPLETION_INITIALIZER(rng.done),
+			.cb = { .owner = THIS_MODULE, .func = rng_initialized_callback }
+		};
+		ret = add_random_ready_callback(&rng.cb);
+		if (!ret) {
+			ret = wait_for_completion_interruptible(&rng.done);
+			if (ret) {
+				del_random_ready_callback(&rng.cb);
+				return ret;
+			}
+		} else if (ret != -EALREADY)
+			return ret;
+		rng_is_initialized = true;
+	}
+	return 0;
+}
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
+/* This is a disaster. Without this API, we really have no way of
+ * knowing if it's initialized. We just return that it has and hope
+ * for the best... */
+static inline int wait_for_random_bytes(void)
+{
+	return 0;
+}
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)
+static inline int get_random_bytes_wait(void *buf, int nbytes)
+{
+	int ret = wait_for_random_bytes();
+	if (unlikely(ret))
+		return ret;
+	get_random_bytes(buf, nbytes);
+	return 0;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) && !defined(ISRHEL7)
+#define system_power_efficient_wq system_unbound_wq
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) && !defined(ISRHEL7)
+#include <linux/ktime.h>
+static inline u64 ktime_get_ns(void)
+{
+	return ktime_to_ns(ktime_get());
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#include <linux/inetdevice.h>
+static inline __be32 our_confirm_addr_indev(struct in_device *in_dev, __be32 dst,  __be32 local, int scope)
+{
+	int same = 0;
+	__be32 addr = 0;
+	for_ifa(in_dev) {
+		if (!addr && (local == ifa->ifa_local || !local) && ifa->ifa_scope <= scope) {
+			addr = ifa->ifa_local;
+			if (same)
+				break;
+		}
+		if (!same) {
+			same = (!local || inet_ifa_match(local, ifa)) && (!dst || inet_ifa_match(dst, ifa));
+			if (same && addr) {
+				if (local || !dst)
+					break;
+				if (inet_ifa_match(addr, ifa))
+					break;
+				if (ifa->ifa_scope <= scope) {
+					addr = ifa->ifa_local;
+					break;
+				}
+				same = 0;
+			}
+		}
+	} endfor_ifa(in_dev);
+	return same ? addr : 0;
+}
+static inline __be32 our_inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope)
+{
+	__be32 addr = 0;
+	struct net_device *dev;
+	if (in_dev)
+		return our_confirm_addr_indev(in_dev, dst, local, scope);
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
+		in_dev = __in_dev_get_rcu(dev);
+		if (in_dev) {
+			addr = our_confirm_addr_indev(in_dev, dst, local, scope);
+			if (addr)
+				break;
+		}
+	}
+	rcu_read_unlock();
+	return addr;
+}
+#define inet_confirm_addr our_inet_confirm_addr
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+static inline void *kvmalloc_ours(size_t size, gfp_t flags)
+{
+	gfp_t kmalloc_flags = flags;
+	void *ret;
+	if (size > PAGE_SIZE) {
+		kmalloc_flags |= __GFP_NOWARN;
+		if (!(kmalloc_flags & __GFP_REPEAT) || (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+			kmalloc_flags |= __GFP_NORETRY;
+	}
+	ret = kmalloc(size, kmalloc_flags);
+	if (ret || size <= PAGE_SIZE)
+		return ret;
+	return __vmalloc(size, flags, PAGE_KERNEL);
+}
+static inline void *kvzalloc_ours(size_t size, gfp_t flags)
+{
+	return kvmalloc_ours(size, flags | __GFP_ZERO);
+}
+#define kvmalloc kvmalloc_ours
+#define kvzalloc kvzalloc_ours
+#endif
+
+#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 41)) && !defined(ISUBUNTU1404)
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+static inline void kvfree_ours(const void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		kfree(addr);
+}
+#define kvfree kvfree_ours
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 9)
+#include <linux/netdevice.h>
+#define priv_destructor destructor
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)
+#define newlink(a,b,c,d,e) newlink(a,b,c,d)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#define nlmsg_parse(a, b, c, d, e, f) nlmsg_parse(a, b, c, d, e)
+#define nla_parse_nested(a, b, c, d, e) nla_parse_nested(a, b, c, d)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+static inline struct nlattr **genl_family_attrbuf(const struct genl_family *family)
+{
+	return family->attrbuf;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+#define PTR_ERR_OR_ZERO(p) PTR_RET(p)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)
+#include <net/netlink.h>
+#define nla_put_u64_64bit(a, b, c, d) nla_put_u64(a, b, c)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
+#include <net/genetlink.h>
+#ifndef GENL_UNS_ADMIN_PERM
+#define GENL_UNS_ADMIN_PERM GENL_ADMIN_PERM
+#endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
+#include <net/genetlink.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) && !defined(ISRHEL7)
+#define genl_register_family(a) genl_register_family_with_ops(a, (struct genl_ops *)genl_ops, ARRAY_SIZE(genl_ops))
+#else
+#define genl_register_family(a) genl_register_family_with_ops(a, genl_ops)
+#endif
+#define COMPAT_CANNOT_USE_GENL_NOPS
+#endif
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 2) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 16) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 65) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 101) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 84)
+#define ___COMPAT_NETLINK_DUMP_BLOCK { int ret; skb->end -= nlmsg_total_size(sizeof(int)); ret = get_device_dump_real(skb, cb); skb->end += nlmsg_total_size(sizeof(int)); return ret; }
+#define ___COMPAT_NETLINK_DUMP_OVERRIDE
+#else
+#define ___COMPAT_NETLINK_DUMP_BLOCK return get_device_dump_real(skb, cb);
+#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 14) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 63)
+#define get_device_dump(a, b) get_device_dump_real(a, b); \
+static int get_device_dump(a, b) { \
+	struct wireguard_device *wg = (struct wireguard_device *)cb->args[0]; \
+	if (!wg) { \
+		int ret = get_device_start(cb); \
+		if (ret) \
+			return ret; \
+	} \
+	___COMPAT_NETLINK_DUMP_BLOCK \
+} \
+static int get_device_dump_real(a, b)
+#define COMPAT_CANNOT_USE_NETLINK_START
+#elif defined(___COMPAT_NETLINK_DUMP_OVERRIDE)
+#define get_device_dump(a, b) get_device_dump_real(a, b); \
+static int get_device_dump(a, b) { \
+	___COMPAT_NETLINK_DUMP_BLOCK \
+} \
+static int get_device_dump_real(a, b)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+#define COMPAT_CANNOT_USE_IN6_DEV_GET
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
+#define COMPAT_CANNOT_USE_DEV_CNF
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
+#define COMPAT_CANNOT_USE_IFF_NO_QUEUE
+#endif
+
+#if defined(CONFIG_X86_64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
+#include <asm/user.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
+#include <asm/xsave.h>
+#include <asm/xcr.h>
+static inline int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
+{
+	return xgetbv(XCR_XFEATURE_ENABLED_MASK) & xfeatures_needed;
+}
+#endif
+#ifndef XFEATURE_MASK_YMM
+#define XFEATURE_MASK_YMM XSTATE_YMM
+#endif
+#ifndef XFEATURE_MASK_SSE
+#define XFEATURE_MASK_SSE XSTATE_SSE
+#endif
+#ifndef XFEATURE_MASK_ZMM_Hi256
+#define XFEATURE_MASK_ZMM_Hi256 XSTATE_ZMM_Hi256
+#endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && defined(CONFIG_X86_64)
+/* This is incredibly dumb and reckless, but as it turns out, there's
+ * not really hardware Linux runs properly on that supports F but not BW
+ * and VL, so in practice this isn't so bad. Plus, this is compat layer,
+ * so the bar remains fairly low.
+ */
+#include <asm/cpufeature.h>
+#ifndef X86_FEATURE_AVX512BW
+#define X86_FEATURE_AVX512BW X86_FEATURE_AVX512F
+#endif
+#ifndef X86_FEATURE_AVX512VL
+#define X86_FEATURE_AVX512VL X86_FEATURE_AVX512F
+#endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0)
+struct _____dummy_container { char dev; };
+#define netdev_notifier_info net_device *)data); __attribute((unused)) char _____dummy = ((struct _____dummy_container
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)
+#define timer_setup(a, b, c) setup_timer(a, ((void (*)(unsigned long))b), ((unsigned long)a))
+#define from_timer(var, callback_timer, timer_fieldname) container_of(callback_timer, typeof(*var), timer_fieldname)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 3)
+#define COMPAT_CANNOT_USE_AVX512
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+#define timespec64 timespec
+#define getnstimeofday64 getnstimeofday
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
+#include <net/genetlink.h>
+#define genl_dump_check_consistent(a, b) genl_dump_check_consistent(a, b, &genl_family)
+#endif
+
+/* https://lkml.org/lkml/2017/6/23/790 */
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <linux/ip.h>
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat_core.h>
+static inline void new_icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb_in, &ctinfo);
+	if (skb_network_header(skb_in) < skb_in->head || (skb_network_header(skb_in) + sizeof(struct iphdr)) > skb_tail_pointer(skb_in))
+		return;
+	if (ct)
+		ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
+	icmp_send(skb_in, type, code, info);
+}
+static inline void new_icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	if (skb_network_header(skb) < skb->head || (skb_network_header(skb) + sizeof(struct ipv6hdr)) > skb_tail_pointer(skb))
+		return;
+	if (ct)
+		ipv6_hdr(skb)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
+	icmpv6_send(skb, type, code, info);
+}
+#define icmp_send(a,b,c,d) new_icmp_send(a,b,c,d)
+#define icmpv6_send(a,b,c,d) new_icmpv6_send(a,b,c,d)
+#endif
+
+/* PaX compatibility */
+#ifdef CONSTIFY_PLUGIN
+#include <linux/cache.h>
+#undef __read_mostly
+#define __read_mostly
+#endif
+#if defined(RAP_PLUGIN) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
+#include <linux/timer.h>
+#define expired_retransmit_handshake(a) expired_retransmit_handshake(unsigned long timer)
+#define expired_send_keepalive(a) expired_send_keepalive(unsigned long timer)
+#define expired_new_handshake(a) expired_new_handshake(unsigned long timer)
+#define expired_zero_key_material(a) expired_zero_key_material(unsigned long timer)
+#define expired_send_persistent_keepalive(a) expired_send_persistent_keepalive(unsigned long timer)
+#undef timer_setup
+#define timer_setup(a, b, c) setup_timer(a, ((void (*)(unsigned long))b), ((unsigned long)a))
+#undef from_timer
+#define from_timer(var, callback_timer, timer_fieldname) container_of((struct timer_list *)callback_timer, typeof(*var), timer_fieldname)
+#endif
+
+#endif /* _WG_COMPAT_H */
diff --git b/net/wireguard/compat/dst_cache/dst_cache.c b/net/wireguard/compat/dst_cache/dst_cache.c
new file mode 100644
index 0000000..7ec22f7
--- /dev/null
+++ b/net/wireguard/compat/dst_cache/dst_cache.c
@@ -0,0 +1,175 @@
+/*
+ * net/core/dst_cache.c - dst entry cache
+ *
+ * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <net/dst_cache.h>
+#include <net/route.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_fib.h>
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 50)
+static inline u32 rt6_get_cookie(const struct rt6_info *rt)
+{
+	if ((unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
+		rt = (struct rt6_info *)(rt->dst.from);
+
+	return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+}
+#endif
+#endif
+#include <uapi/linux/in.h>
+
+struct dst_cache_pcpu {
+	unsigned long refresh_ts;
+	struct dst_entry *dst;
+	u32 cookie;
+	union {
+		struct in_addr in_saddr;
+		struct in6_addr in6_saddr;
+	};
+};
+
+static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
+				      struct dst_entry *dst, u32 cookie)
+{
+	dst_release(dst_cache->dst);
+	if (dst)
+		dst_hold(dst);
+
+	dst_cache->cookie = cookie;
+	dst_cache->dst = dst;
+}
+
+static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
+					       struct dst_cache_pcpu *idst)
+{
+	struct dst_entry *dst;
+
+	dst = idst->dst;
+	if (!dst)
+		goto fail;
+
+	/* the cache already hold a dst reference; it can't go away */
+	dst_hold(dst);
+
+	if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
+		     (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
+		dst_cache_per_cpu_dst_set(idst, NULL, 0);
+		dst_release(dst);
+		goto fail;
+	}
+	return dst;
+
+fail:
+	idst->refresh_ts = jiffies;
+	return NULL;
+}
+
+struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
+{
+	if (!dst_cache->cache)
+		return NULL;
+
+	return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
+}
+
+struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
+{
+	struct dst_cache_pcpu *idst;
+	struct dst_entry *dst;
+
+	if (!dst_cache->cache)
+		return NULL;
+
+	idst = this_cpu_ptr(dst_cache->cache);
+	dst = dst_cache_per_cpu_get(dst_cache, idst);
+	if (!dst)
+		return NULL;
+
+	*saddr = idst->in_saddr.s_addr;
+	return container_of(dst, struct rtable, dst);
+}
+
+void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
+		       __be32 saddr)
+{
+	struct dst_cache_pcpu *idst;
+
+	if (!dst_cache->cache)
+		return;
+
+	idst = this_cpu_ptr(dst_cache->cache);
+	dst_cache_per_cpu_dst_set(idst, dst, 0);
+	idst->in_saddr.s_addr = saddr;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
+		       const struct in6_addr *addr)
+{
+	struct dst_cache_pcpu *idst;
+
+	if (!dst_cache->cache)
+		return;
+
+	idst = this_cpu_ptr(dst_cache->cache);
+	dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
+				  rt6_get_cookie((struct rt6_info *)dst));
+	idst->in6_saddr = *addr;
+}
+
+struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
+				    struct in6_addr *saddr)
+{
+	struct dst_cache_pcpu *idst;
+	struct dst_entry *dst;
+
+	if (!dst_cache->cache)
+		return NULL;
+
+	idst = this_cpu_ptr(dst_cache->cache);
+	dst = dst_cache_per_cpu_get(dst_cache, idst);
+	if (!dst)
+		return NULL;
+
+	*saddr = idst->in6_saddr;
+	return dst;
+}
+#endif
+
+int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
+	BUG_ON(gfp & GFP_ATOMIC);
+	dst_cache->cache = alloc_percpu(struct dst_cache_pcpu);
+#else
+	dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
+					    gfp | __GFP_ZERO);
+#endif
+	if (!dst_cache->cache)
+		return -ENOMEM;
+
+	dst_cache_reset(dst_cache);
+	return 0;
+}
+
+void dst_cache_destroy(struct dst_cache *dst_cache)
+{
+	int i;
+
+	if (!dst_cache->cache)
+		return;
+
+	for_each_possible_cpu(i)
+		dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
+
+	free_percpu(dst_cache->cache);
+}
diff --git b/net/wireguard/compat/dst_cache/include/net/dst_cache.h b/net/wireguard/compat/dst_cache/include/net/dst_cache.h
new file mode 100644
index 0000000..5a083c7
--- /dev/null
+++ b/net/wireguard/compat/dst_cache/include/net/dst_cache.h
@@ -0,0 +1,97 @@
+#ifndef _WG_NET_DST_CACHE_H
+#define _WG_NET_DST_CACHE_H
+
+#include <linux/jiffies.h>
+#include <net/dst.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_fib.h>
+#endif
+
+struct dst_cache {
+	struct dst_cache_pcpu __percpu *cache;
+	unsigned long reset_ts;
+};
+
+/**
+ *	dst_cache_get - perform cache lookup
+ *	@dst_cache: the cache
+ *
+ *	The caller should use dst_cache_get_ip4() if it need to retrieve the
+ *	source address to be used when xmitting to the cached dst.
+ *	local BH must be disabled.
+ */
+struct dst_entry *dst_cache_get(struct dst_cache *dst_cache);
+
+/**
+ *	dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address
+ *	@dst_cache: the cache
+ *	@saddr: return value for the retrieved source address
+ *
+ *	local BH must be disabled.
+ */
+struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr);
+
+/**
+ *	dst_cache_set_ip4 - store the ipv4 dst into the cache
+ *	@dst_cache: the cache
+ *	@dst: the entry to be cached
+ *	@saddr: the source address to be stored inside the cache
+ *
+ *	local BH must be disabled.
+ */
+void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
+		       __be32 saddr);
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+/**
+ *	dst_cache_set_ip6 - store the ipv6 dst into the cache
+ *	@dst_cache: the cache
+ *	@dst: the entry to be cached
+ *	@saddr: the source address to be stored inside the cache
+ *
+ *	local BH must be disabled.
+ */
+void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
+		       const struct in6_addr *addr);
+
+/**
+ *	dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
+ *	@dst_cache: the cache
+ *	@saddr: return value for the retrieved source address
+ *
+ *	local BH must be disabled.
+ */
+struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
+				    struct in6_addr *saddr);
+#endif
+
+/**
+ *	dst_cache_reset - invalidate the cache contents
+ *	@dst_cache: the cache
+ *
+ *	This do not free the cached dst to avoid races and contentions.
+ *	the dst will be freed on later cache lookup.
+ */
+static inline void dst_cache_reset(struct dst_cache *dst_cache)
+{
+	dst_cache->reset_ts = jiffies;
+}
+
+/**
+ *	dst_cache_init - initialize the cache, allocating the required storage
+ *	@dst_cache: the cache
+ *	@gfp: allocation flags
+ */
+int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
+
+/**
+ *	dst_cache_destroy - empty the cache and free the allocated storage
+ *	@dst_cache: the cache
+ *
+ *	No synchronization is enforced: it must be called only when the cache
+ *	is unsed.
+ */
+void dst_cache_destroy(struct dst_cache *dst_cache);
+
+#endif /* _WG_NET_DST_CACHE_H */
diff --git b/net/wireguard/compat/fpu/include/asm/fpu/api.h b/net/wireguard/compat/fpu/include/asm/fpu/api.h
new file mode 100644
index 0000000..f3f9117
--- /dev/null
+++ b/net/wireguard/compat/fpu/include/asm/fpu/api.h
@@ -0,0 +1 @@
+#include <asm/i387.h>
diff --git b/net/wireguard/compat/memneq/include.h b/net/wireguard/compat/memneq/include.h
new file mode 100644
index 0000000..2d18acd
--- /dev/null
+++ b/net/wireguard/compat/memneq/include.h
@@ -0,0 +1,5 @@
+extern noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size);
+static inline int crypto_memneq(const void *a, const void *b, size_t size)
+{
+	return __crypto_memneq(a, b, size) != 0UL ? 1 : 0;
+}
diff --git b/net/wireguard/compat/memneq/memneq.c b/net/wireguard/compat/memneq/memneq.c
new file mode 100644
index 0000000..0bcb4b8
--- /dev/null
+++ b/net/wireguard/compat/memneq/memneq.c
@@ -0,0 +1,170 @@
+/*
+ * Constant-time equality testing of memory regions.
+ *
+ * Authors:
+ *
+ *   James Yonan <james@openvpn.net>
+ *   Daniel Borkmann <dborkman@redhat.com>
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of OpenVPN Technologies nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <crypto/algapi.h>
+
+/* Make the optimizer believe the variable can be manipulated arbitrarily. */
+#define COMPILER_OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var))
+
+#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
+
+/* Generic path for arbitrary size */
+static inline unsigned long
+__crypto_memneq_generic(const void *a, const void *b, size_t size)
+{
+	unsigned long neq = 0;
+
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+	while (size >= sizeof(unsigned long)) {
+		neq |= *(unsigned long *)a ^ *(unsigned long *)b;
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		a += sizeof(unsigned long);
+		b += sizeof(unsigned long);
+		size -= sizeof(unsigned long);
+	}
+#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+	while (size > 0) {
+		neq |= *(unsigned char *)a ^ *(unsigned char *)b;
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		a += 1;
+		b += 1;
+		size -= 1;
+	}
+	return neq;
+}
+
+/* Loop-free fast-path for frequently used 16-byte size */
+static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
+{
+	unsigned long neq = 0;
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (sizeof(unsigned long) == 8) {
+		neq |= *(unsigned long *)(a)   ^ *(unsigned long *)(b);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+	} else if (sizeof(unsigned int) == 4) {
+		neq |= *(unsigned int *)(a)    ^ *(unsigned int *)(b);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned int *)(a+4)  ^ *(unsigned int *)(b+4);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned int *)(a+8)  ^ *(unsigned int *)(b+8);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+	} else
+#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+	{
+		neq |= *(unsigned char *)(a)    ^ *(unsigned char *)(b);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+1)  ^ *(unsigned char *)(b+1);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+2)  ^ *(unsigned char *)(b+2);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+3)  ^ *(unsigned char *)(b+3);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+4)  ^ *(unsigned char *)(b+4);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+5)  ^ *(unsigned char *)(b+5);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+6)  ^ *(unsigned char *)(b+6);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+7)  ^ *(unsigned char *)(b+7);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+8)  ^ *(unsigned char *)(b+8);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+9)  ^ *(unsigned char *)(b+9);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+		neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15);
+		COMPILER_OPTIMIZER_HIDE_VAR(neq);
+	}
+
+	return neq;
+}
+
+/* Compare two areas of memory without leaking timing information,
+ * and with special optimizations for common sizes.  Users should
+ * not call this function directly, but should instead use
+ * crypto_memneq defined in crypto/algapi.h.
+ */
+noinline unsigned long __crypto_memneq(const void *a, const void *b,
+				       size_t size)
+{
+	switch (size) {
+	case 16:
+		return __crypto_memneq_16(a, b);
+	default:
+		return __crypto_memneq_generic(a, b, size);
+	}
+}
+
+#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */
diff --git b/net/wireguard/compat/ptr_ring/include/linux/ptr_ring.h b/net/wireguard/compat/ptr_ring/include/linux/ptr_ring.h
new file mode 100644
index 0000000..37b4bb2
--- /dev/null
+++ b/net/wireguard/compat/ptr_ring/include/linux/ptr_ring.h
@@ -0,0 +1,640 @@
+/*
+ *	Definitions for the 'struct ptr_ring' datastructure.
+ *
+ *	Author:
+ *		Michael S. Tsirkin <mst@redhat.com>
+ *
+ *	Copyright (C) 2016 Red Hat, Inc.
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License as published by the
+ *	Free Software Foundation; either version 2 of the License, or (at your
+ *	option) any later version.
+ *
+ *	This is a limited-size FIFO maintaining pointers in FIFO order, with
+ *	one CPU producing entries and another consuming entries from a FIFO.
+ *
+ *	This implementation tries to minimize cache-contention when there is a
+ *	single producer and a single consumer CPU.
+ */
+
+#ifndef _LINUX_PTR_RING_H
+#define _LINUX_PTR_RING_H 1
+
+#ifdef __KERNEL__
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <asm/errno.h>
+#endif
+
+struct ptr_ring {
+	int producer ____cacheline_aligned_in_smp;
+	spinlock_t producer_lock;
+	int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
+	int consumer_tail; /* next entry to invalidate */
+	spinlock_t consumer_lock;
+	/* Shared consumer/producer data */
+	/* Read-only by both the producer and the consumer */
+	int size ____cacheline_aligned_in_smp; /* max entries in queue */
+	int batch; /* number of entries to consume in a batch */
+	void **queue;
+};
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax().  If ring is ever resized, callers must hold
+ * producer_lock - see e.g. ptr_ring_full.  Otherwise, if callers don't hold
+ * producer_lock, the next call to __ptr_ring_produce may fail.
+ */
+static inline bool __ptr_ring_full(struct ptr_ring *r)
+{
+	return r->queue[r->producer];
+}
+
+static inline bool ptr_ring_full(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock(&r->producer_lock);
+	ret = __ptr_ring_full(r);
+	spin_unlock(&r->producer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_full_irq(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_irq(&r->producer_lock);
+	ret = __ptr_ring_full(r);
+	spin_unlock_irq(&r->producer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_full_any(struct ptr_ring *r)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&r->producer_lock, flags);
+	ret = __ptr_ring_full(r);
+	spin_unlock_irqrestore(&r->producer_lock, flags);
+
+	return ret;
+}
+
+static inline bool ptr_ring_full_bh(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_bh(&r->producer_lock);
+	ret = __ptr_ring_full(r);
+	spin_unlock_bh(&r->producer_lock);
+
+	return ret;
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must hold producer_lock.
+ */
+static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
+{
+	if (unlikely(!r->size) || r->queue[r->producer])
+		return -ENOSPC;
+
+	r->queue[r->producer++] = ptr;
+	if (unlikely(r->producer >= r->size))
+		r->producer = 0;
+	return 0;
+}
+
+/*
+ * Note: resize (below) nests producer lock within consumer lock, so if you
+ * consume in interrupt or BH context, you must disable interrupts/BH when
+ * calling this.
+ */
+static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
+{
+	int ret;
+
+	spin_lock(&r->producer_lock);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock(&r->producer_lock);
+
+	return ret;
+}
+
+static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
+{
+	int ret;
+
+	spin_lock_irq(&r->producer_lock);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock_irq(&r->producer_lock);
+
+	return ret;
+}
+
+static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&r->producer_lock, flags);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock_irqrestore(&r->producer_lock, flags);
+
+	return ret;
+}
+
+static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
+{
+	int ret;
+
+	spin_lock_bh(&r->producer_lock);
+	ret = __ptr_ring_produce(r, ptr);
+	spin_unlock_bh(&r->producer_lock);
+
+	return ret;
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must take consumer_lock
+ * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
+ * If ring is never resized, and if the pointer is merely
+ * tested, there's no need to take the lock - see e.g.  __ptr_ring_empty.
+ */
+static inline void *__ptr_ring_peek(struct ptr_ring *r)
+{
+	if (likely(r->size))
+		return r->queue[r->consumer_head];
+	return NULL;
+}
+
+/* Note: callers invoking this in a loop must use a compiler barrier,
+ * for example cpu_relax(). Callers must take consumer_lock
+ * if the ring is ever resized - see e.g. ptr_ring_empty.
+ */
+static inline bool __ptr_ring_empty(struct ptr_ring *r)
+{
+	return !__ptr_ring_peek(r);
+}
+
+static inline bool ptr_ring_empty(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock(&r->consumer_lock);
+	ret = __ptr_ring_empty(r);
+	spin_unlock(&r->consumer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_irq(&r->consumer_lock);
+	ret = __ptr_ring_empty(r);
+	spin_unlock_irq(&r->consumer_lock);
+
+	return ret;
+}
+
+static inline bool ptr_ring_empty_any(struct ptr_ring *r)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&r->consumer_lock, flags);
+	ret = __ptr_ring_empty(r);
+	spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+	return ret;
+}
+
+static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
+{
+	bool ret;
+
+	spin_lock_bh(&r->consumer_lock);
+	ret = __ptr_ring_empty(r);
+	spin_unlock_bh(&r->consumer_lock);
+
+	return ret;
+}
+
+/* Must only be called after __ptr_ring_peek returned !NULL */
+static inline void __ptr_ring_discard_one(struct ptr_ring *r)
+{
+	/* Fundamentally, what we want to do is update consumer
+	 * index and zero out the entry so producer can reuse it.
+	 * Doing it naively at each consume would be as simple as:
+	 *       r->queue[r->consumer++] = NULL;
+	 *       if (unlikely(r->consumer >= r->size))
+	 *               r->consumer = 0;
+	 * but that is suboptimal when the ring is full as producer is writing
+	 * out new entries in the same cache line.  Defer these updates until a
+	 * batch of entries has been consumed.
+	 */
+	int head = r->consumer_head++;
+
+	/* Once we have processed enough entries invalidate them in
+	 * the ring all at once so producer can reuse their space in the ring.
+	 * We also do this when we reach end of the ring - not mandatory
+	 * but helps keep the implementation simple.
+	 */
+	if (unlikely(r->consumer_head - r->consumer_tail >= r->batch ||
+		     r->consumer_head >= r->size)) {
+		/* Zero out entries in the reverse order: this way we touch the
+		 * cache line that producer might currently be reading the last;
+		 * producer won't make progress and touch other cache lines
+		 * besides the first one until we write out all entries.
+		 */
+		while (likely(head >= r->consumer_tail))
+			r->queue[head--] = NULL;
+		r->consumer_tail = r->consumer_head;
+	}
+	if (unlikely(r->consumer_head >= r->size)) {
+		r->consumer_head = 0;
+		r->consumer_tail = 0;
+	}
+}
+
+static inline void *__ptr_ring_consume(struct ptr_ring *r)
+{
+	void *ptr;
+
+	ptr = __ptr_ring_peek(r);
+	if (ptr)
+		__ptr_ring_discard_one(r);
+
+	return ptr;
+}
+
+static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
+					     void **array, int n)
+{
+	void *ptr;
+	int i;
+
+	for (i = 0; i < n; i++) {
+		ptr = __ptr_ring_consume(r);
+		if (!ptr)
+			break;
+		array[i] = ptr;
+	}
+
+	return i;
+}
+
+/*
+ * Note: resize (below) nests producer lock within consumer lock, so if you
+ * call this in interrupt or BH context, you must disable interrupts/BH when
+ * producing.
+ */
+static inline void *ptr_ring_consume(struct ptr_ring *r)
+{
+	void *ptr;
+
+	spin_lock(&r->consumer_lock);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock(&r->consumer_lock);
+
+	return ptr;
+}
+
+static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
+{
+	void *ptr;
+
+	spin_lock_irq(&r->consumer_lock);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock_irq(&r->consumer_lock);
+
+	return ptr;
+}
+
+static inline void *ptr_ring_consume_any(struct ptr_ring *r)
+{
+	unsigned long flags;
+	void *ptr;
+
+	spin_lock_irqsave(&r->consumer_lock, flags);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+	return ptr;
+}
+
+static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
+{
+	void *ptr;
+
+	spin_lock_bh(&r->consumer_lock);
+	ptr = __ptr_ring_consume(r);
+	spin_unlock_bh(&r->consumer_lock);
+
+	return ptr;
+}
+
+static inline int ptr_ring_consume_batched(struct ptr_ring *r,
+					   void **array, int n)
+{
+	int ret;
+
+	spin_lock(&r->consumer_lock);
+	ret = __ptr_ring_consume_batched(r, array, n);
+	spin_unlock(&r->consumer_lock);
+
+	return ret;
+}
+
+static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
+					       void **array, int n)
+{
+	int ret;
+
+	spin_lock_irq(&r->consumer_lock);
+	ret = __ptr_ring_consume_batched(r, array, n);
+	spin_unlock_irq(&r->consumer_lock);
+
+	return ret;
+}
+
+static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
+					       void **array, int n)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&r->consumer_lock, flags);
+	ret = __ptr_ring_consume_batched(r, array, n);
+	spin_unlock_irqrestore(&r->consumer_lock, flags);
+
+	return ret;
+}
+
+static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
+					      void **array, int n)
+{
+	int ret;
+
+	spin_lock_bh(&r->consumer_lock);
+	ret = __ptr_ring_consume_batched(r, array, n);
+	spin_unlock_bh(&r->consumer_lock);
+
+	return ret;
+}
+
+/* Cast to structure type and call a function without discarding from FIFO.
+ * Function must return a value.
+ * Callers must take consumer_lock.
+ */
+#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
+
+#define PTR_RING_PEEK_CALL(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	\
+	spin_lock(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	\
+	spin_lock_irq(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock_irq(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	\
+	spin_lock_bh(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock_bh(&(r)->consumer_lock); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
+	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+	unsigned long __PTR_RING_PEEK_CALL_f;\
+	\
+	spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+	spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+	__PTR_RING_PEEK_CALL_v; \
+})
+
+static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
+{
+	return kcalloc(size, sizeof(void *), gfp);
+}
+
+static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
+{
+	r->size = size;
+	r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
+	/* We need to set batch at least to 1 to make logic
+	 * in __ptr_ring_discard_one work correctly.
+	 * Batching too much (because ring is small) would cause a lot of
+	 * burstiness. Needs tuning, for now disable batching.
+	 */
+	if (r->batch > r->size / 2 || !r->batch)
+		r->batch = 1;
+}
+
+static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
+{
+	r->queue = __ptr_ring_init_queue_alloc(size, gfp);
+	if (!r->queue)
+		return -ENOMEM;
+
+	__ptr_ring_set_size(r, size);
+	r->producer = r->consumer_head = r->consumer_tail = 0;
+	spin_lock_init(&r->producer_lock);
+	spin_lock_init(&r->consumer_lock);
+
+	return 0;
+}
+
+/*
+ * Return entries into ring. Destroy entries that don't fit.
+ *
+ * Note: this is expected to be a rare slow path operation.
+ *
+ * Note: producer lock is nested within consumer lock, so if you
+ * resize you must make sure all uses nest correctly.
+ * In particular if you consume ring in interrupt or BH context, you must
+ * disable interrupts/BH when doing so.
+ */
+static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
+				      void (*destroy)(void *))
+{
+	unsigned long flags;
+	int head;
+
+	spin_lock_irqsave(&r->consumer_lock, flags);
+	spin_lock(&r->producer_lock);
+
+	if (!r->size)
+		goto done;
+
+	/*
+	 * Clean out buffered entries (for simplicity). This way following code
+	 * can test entries for NULL and if not assume they are valid.
+	 */
+	head = r->consumer_head - 1;
+	while (likely(head >= r->consumer_tail))
+		r->queue[head--] = NULL;
+	r->consumer_tail = r->consumer_head;
+
+	/*
+	 * Go over entries in batch, start moving head back and copy entries.
+	 * Stop when we run into previously unconsumed entries.
+	 */
+	while (n) {
+		head = r->consumer_head - 1;
+		if (head < 0)
+			head = r->size - 1;
+		if (r->queue[head]) {
+			/* This batch entry will have to be destroyed. */
+			goto done;
+		}
+		r->queue[head] = batch[--n];
+		r->consumer_tail = r->consumer_head = head;
+	}
+
+done:
+	/* Destroy all entries left in the batch. */
+	while (n)
+		destroy(batch[--n]);
+	spin_unlock(&r->producer_lock);
+	spin_unlock_irqrestore(&r->consumer_lock, flags);
+}
+
+static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
+					   int size, gfp_t gfp,
+					   void (*destroy)(void *))
+{
+	int producer = 0;
+	void **old;
+	void *ptr;
+
+	while ((ptr = __ptr_ring_consume(r)))
+		if (producer < size)
+			queue[producer++] = ptr;
+		else if (destroy)
+			destroy(ptr);
+
+	__ptr_ring_set_size(r, size);
+	r->producer = producer;
+	r->consumer_head = 0;
+	r->consumer_tail = 0;
+	old = r->queue;
+	r->queue = queue;
+
+	return old;
+}
+
+/*
+ * Note: producer lock is nested within consumer lock, so if you
+ * resize you must make sure all uses nest correctly.
+ * In particular if you consume ring in interrupt or BH context, you must
+ * disable interrupts/BH when doing so.
+ */
+static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
+				  void (*destroy)(void *))
+{
+	unsigned long flags;
+	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
+	void **old;
+
+	if (!queue)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&(r)->consumer_lock, flags);
+	spin_lock(&(r)->producer_lock);
+
+	old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
+
+	spin_unlock(&(r)->producer_lock);
+	spin_unlock_irqrestore(&(r)->consumer_lock, flags);
+
+	kfree(old);
+
+	return 0;
+}
+
+/*
+ * Note: producer lock is nested within consumer lock, so if you
+ * resize you must make sure all uses nest correctly.
+ * In particular if you consume ring in interrupt or BH context, you must
+ * disable interrupts/BH when doing so.
+ */
+static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
+					   unsigned int nrings,
+					   int size,
+					   gfp_t gfp, void (*destroy)(void *))
+{
+	unsigned long flags;
+	void ***queues;
+	int i;
+
+	queues = kmalloc_array(nrings, sizeof(*queues), gfp);
+	if (!queues)
+		goto noqueues;
+
+	for (i = 0; i < nrings; ++i) {
+		queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
+		if (!queues[i])
+			goto nomem;
+	}
+
+	for (i = 0; i < nrings; ++i) {
+		spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
+		spin_lock(&(rings[i])->producer_lock);
+		queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
+						  size, gfp, destroy);
+		spin_unlock(&(rings[i])->producer_lock);
+		spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
+	}
+
+	for (i = 0; i < nrings; ++i)
+		kfree(queues[i]);
+
+	kfree(queues);
+
+	return 0;
+
+nomem:
+	while (--i >= 0)
+		kfree(queues[i]);
+
+	kfree(queues);
+
+noqueues:
+	return -ENOMEM;
+}
+
+static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
+{
+	void *ptr;
+
+	if (destroy)
+		while ((ptr = ptr_ring_consume(r)))
+			destroy(ptr);
+	kfree(r->queue);
+}
+
+#endif /* _LINUX_PTR_RING_H  */
diff --git b/net/wireguard/compat/simd/include/asm/simd.h b/net/wireguard/compat/simd/include/asm/simd.h
new file mode 100644
index 0000000..f3f9117
--- /dev/null
+++ b/net/wireguard/compat/simd/include/asm/simd.h
@@ -0,0 +1 @@
+#include <asm/i387.h>
diff --git b/net/wireguard/compat/siphash/include/linux/siphash.h b/net/wireguard/compat/siphash/include/linux/siphash.h
new file mode 100644
index 0000000..43aeaa0
--- /dev/null
+++ b/net/wireguard/compat/siphash/include/linux/siphash.h
@@ -0,0 +1,140 @@
+/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ *
+ * This implementation is specifically for SipHash2-4 for a secure PRF
+ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
+ * hashtables.
+ */
+
+#ifndef _WG_LINUX_SIPHASH_H
+#define _WG_LINUX_SIPHASH_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+#define SIPHASH_ALIGNMENT __alignof__(u64)
+typedef struct {
+	u64 key[2];
+} siphash_key_t;
+
+u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
+#endif
+
+u64 siphash_1u64(const u64 a, const siphash_key_t *key);
+u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
+u64 siphash_3u64(const u64 a, const u64 b, const u64 c,
+		 const siphash_key_t *key);
+u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d,
+		 const siphash_key_t *key);
+u64 siphash_1u32(const u32 a, const siphash_key_t *key);
+u64 siphash_3u32(const u32 a, const u32 b, const u32 c,
+		 const siphash_key_t *key);
+
+static inline u64 siphash_2u32(const u32 a, const u32 b,
+			       const siphash_key_t *key)
+{
+	return siphash_1u64((u64)b << 32 | a, key);
+}
+static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c,
+			       const u32 d, const siphash_key_t *key)
+{
+	return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key);
+}
+
+
+static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
+				     const siphash_key_t *key)
+{
+	if (__builtin_constant_p(len) && len == 4)
+		return siphash_1u32(le32_to_cpup((const __le32 *)data), key);
+	if (__builtin_constant_p(len) && len == 8)
+		return siphash_1u64(le64_to_cpu(data[0]), key);
+	if (__builtin_constant_p(len) && len == 16)
+		return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
+				    key);
+	if (__builtin_constant_p(len) && len == 24)
+		return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
+				    le64_to_cpu(data[2]), key);
+	if (__builtin_constant_p(len) && len == 32)
+		return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
+				    le64_to_cpu(data[2]), le64_to_cpu(data[3]),
+				    key);
+	return __siphash_aligned(data, len, key);
+}
+
+/**
+ * siphash - compute 64-bit siphash PRF value
+ * @data: buffer to hash
+ * @size: size of @data
+ * @key: the siphash key
+ */
+static inline u64 siphash(const void *data, size_t len,
+			  const siphash_key_t *key)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
+		return __siphash_unaligned(data, len, key);
+#endif
+	return ___siphash_aligned(data, len, key);
+}
+
+#define HSIPHASH_ALIGNMENT __alignof__(unsigned long)
+typedef struct {
+	unsigned long key[2];
+} hsiphash_key_t;
+
+u32 __hsiphash_aligned(const void *data, size_t len,
+		       const hsiphash_key_t *key);
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len,
+			 const hsiphash_key_t *key);
+#endif
+
+u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
+u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
+u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c,
+		  const hsiphash_key_t *key);
+u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d,
+		  const hsiphash_key_t *key);
+
+static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
+				      const hsiphash_key_t *key)
+{
+	if (__builtin_constant_p(len) && len == 4)
+		return hsiphash_1u32(le32_to_cpu(data[0]), key);
+	if (__builtin_constant_p(len) && len == 8)
+		return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+				     key);
+	if (__builtin_constant_p(len) && len == 12)
+		return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+				     le32_to_cpu(data[2]), key);
+	if (__builtin_constant_p(len) && len == 16)
+		return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
+				     le32_to_cpu(data[2]), le32_to_cpu(data[3]),
+				     key);
+	return __hsiphash_aligned(data, len, key);
+}
+
+/**
+ * hsiphash - compute 32-bit hsiphash PRF value
+ * @data: buffer to hash
+ * @size: size of @data
+ * @key: the hsiphash key
+ */
+static inline u32 hsiphash(const void *data, size_t len,
+			   const hsiphash_key_t *key)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
+		return __hsiphash_unaligned(data, len, key);
+#endif
+	return ___hsiphash_aligned(data, len, key);
+}
+
+#endif /* _WG_LINUX_SIPHASH_H */
diff --git b/net/wireguard/compat/siphash/siphash.c b/net/wireguard/compat/siphash/siphash.c
new file mode 100644
index 0000000..0d4309b
--- /dev/null
+++ b/net/wireguard/compat/siphash/siphash.c
@@ -0,0 +1,539 @@
+/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.
+ *
+ * SipHash: a fast short-input PRF
+ * https://131002.net/siphash/
+ *
+ * This implementation is specifically for SipHash2-4 for a secure PRF
+ * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
+ * hashtables.
+ */
+
+#include <linux/siphash.h>
+#include <asm/unaligned.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#ifdef __LITTLE_ENDIAN
+#define bytemask_from_count(cnt)	(~(~0ul << (cnt)*8))
+#else
+#define bytemask_from_count(cnt)	(~(~0ul >> (cnt)*8))
+#endif
+#endif
+
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+#include <linux/dcache.h>
+#include <asm/word-at-a-time.h>
+#endif
+
+#define SIPROUND \
+	do { \
+	v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
+	v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
+	v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
+	v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
+	} while (0)
+
+#define PREAMBLE(len) \
+	u64 v0 = 0x736f6d6570736575ULL; \
+	u64 v1 = 0x646f72616e646f6dULL; \
+	u64 v2 = 0x6c7967656e657261ULL; \
+	u64 v3 = 0x7465646279746573ULL; \
+	u64 b = ((u64)(len)) << 56; \
+	v3 ^= key->key[1]; \
+	v2 ^= key->key[0]; \
+	v1 ^= key->key[1]; \
+	v0 ^= key->key[0];
+
+#define POSTAMBLE \
+	v3 ^= b; \
+	SIPROUND; \
+	SIPROUND; \
+	v0 ^= b; \
+	v2 ^= 0xff; \
+	SIPROUND; \
+	SIPROUND; \
+	SIPROUND; \
+	SIPROUND; \
+	return (v0 ^ v1) ^ (v2 ^ v3);
+
+u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
+{
+	const u8 *end = data + len - (len % sizeof(u64));
+	const u8 left = len & (sizeof(u64) - 1);
+	u64 m;
+	PREAMBLE(len)
+	for (; data != end; data += sizeof(u64)) {
+		m = le64_to_cpup(data);
+		v3 ^= m;
+		SIPROUND;
+		SIPROUND;
+		v0 ^= m;
+	}
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+	if (left)
+		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+						  bytemask_from_count(left)));
+#else
+	switch (left) {
+	case 7: b |= ((u64)end[6]) << 48;
+	case 6: b |= ((u64)end[5]) << 40;
+	case 5: b |= ((u64)end[4]) << 32;
+	case 4: b |= le32_to_cpup(data); break;
+	case 3: b |= ((u64)end[2]) << 16;
+	case 2: b |= le16_to_cpup(data); break;
+	case 1: b |= end[0];
+	}
+#endif
+	POSTAMBLE
+}
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
+{
+	const u8 *end = data + len - (len % sizeof(u64));
+	const u8 left = len & (sizeof(u64) - 1);
+	u64 m;
+	PREAMBLE(len)
+	for (; data != end; data += sizeof(u64)) {
+		m = get_unaligned_le64(data);
+		v3 ^= m;
+		SIPROUND;
+		SIPROUND;
+		v0 ^= m;
+	}
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+	if (left)
+		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+						  bytemask_from_count(left)));
+#else
+	switch (left) {
+	case 7: b |= ((u64)end[6]) << 48;
+	case 6: b |= ((u64)end[5]) << 40;
+	case 5: b |= ((u64)end[4]) << 32;
+	case 4: b |= get_unaligned_le32(end); break;
+	case 3: b |= ((u64)end[2]) << 16;
+	case 2: b |= get_unaligned_le16(end); break;
+	case 1: b |= end[0];
+	}
+#endif
+	POSTAMBLE
+}
+#endif
+
+/**
+ * siphash_1u64 - compute 64-bit siphash PRF value of a u64
+ * @first: first u64
+ * @key: the siphash key
+ */
+u64 siphash_1u64(const u64 first, const siphash_key_t *key)
+{
+	PREAMBLE(8)
+	v3 ^= first;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= first;
+	POSTAMBLE
+}
+
+/**
+ * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64
+ * @first: first u64
+ * @second: second u64
+ * @key: the siphash key
+ */
+u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
+{
+	PREAMBLE(16)
+	v3 ^= first;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= second;
+	POSTAMBLE
+}
+
+/**
+ * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64
+ * @first: first u64
+ * @second: second u64
+ * @third: third u64
+ * @key: the siphash key
+ */
+u64 siphash_3u64(const u64 first, const u64 second, const u64 third,
+		 const siphash_key_t *key)
+{
+	PREAMBLE(24)
+	v3 ^= first;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= second;
+	v3 ^= third;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= third;
+	POSTAMBLE
+}
+
+/**
+ * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64
+ * @first: first u64
+ * @second: second u64
+ * @third: third u64
+ * @forth: forth u64
+ * @key: the siphash key
+ */
+u64 siphash_4u64(const u64 first, const u64 second, const u64 third,
+		 const u64 forth, const siphash_key_t *key)
+{
+	PREAMBLE(32)
+	v3 ^= first;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= second;
+	v3 ^= third;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= third;
+	v3 ^= forth;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= forth;
+	POSTAMBLE
+}
+
+u64 siphash_1u32(const u32 first, const siphash_key_t *key)
+{
+	PREAMBLE(4)
+	b |= first;
+	POSTAMBLE
+}
+
+u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
+		 const siphash_key_t *key)
+{
+	u64 combined = (u64)second << 32 | first;
+	PREAMBLE(12)
+	v3 ^= combined;
+	SIPROUND;
+	SIPROUND;
+	v0 ^= combined;
+	b |= third;
+	POSTAMBLE
+}
+
+#if BITS_PER_LONG == 64
+/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for
+ * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3.
+ */
+
+#define HSIPROUND SIPROUND
+#define HPREAMBLE(len) PREAMBLE(len)
+#define HPOSTAMBLE \
+	v3 ^= b; \
+	HSIPROUND; \
+	v0 ^= b; \
+	v2 ^= 0xff; \
+	HSIPROUND; \
+	HSIPROUND; \
+	HSIPROUND; \
+	return (v0 ^ v1) ^ (v2 ^ v3);
+
+u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+{
+	const u8 *end = data + len - (len % sizeof(u64));
+	const u8 left = len & (sizeof(u64) - 1);
+	u64 m;
+	HPREAMBLE(len)
+	for (; data != end; data += sizeof(u64)) {
+		m = le64_to_cpup(data);
+		v3 ^= m;
+		HSIPROUND;
+		v0 ^= m;
+	}
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+	if (left)
+		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+						  bytemask_from_count(left)));
+#else
+	switch (left) {
+	case 7: b |= ((u64)end[6]) << 48;
+	case 6: b |= ((u64)end[5]) << 40;
+	case 5: b |= ((u64)end[4]) << 32;
+	case 4: b |= le32_to_cpup(data); break;
+	case 3: b |= ((u64)end[2]) << 16;
+	case 2: b |= le16_to_cpup(data); break;
+	case 1: b |= end[0];
+	}
+#endif
+	HPOSTAMBLE
+}
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len,
+			 const hsiphash_key_t *key)
+{
+	const u8 *end = data + len - (len % sizeof(u64));
+	const u8 left = len & (sizeof(u64) - 1);
+	u64 m;
+	HPREAMBLE(len)
+	for (; data != end; data += sizeof(u64)) {
+		m = get_unaligned_le64(data);
+		v3 ^= m;
+		HSIPROUND;
+		v0 ^= m;
+	}
+#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
+	if (left)
+		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
+						  bytemask_from_count(left)));
+#else
+	switch (left) {
+	case 7: b |= ((u64)end[6]) << 48;
+	case 6: b |= ((u64)end[5]) << 40;
+	case 5: b |= ((u64)end[4]) << 32;
+	case 4: b |= get_unaligned_le32(end); break;
+	case 3: b |= ((u64)end[2]) << 16;
+	case 2: b |= get_unaligned_le16(end); break;
+	case 1: b |= end[0];
+	}
+#endif
+	HPOSTAMBLE
+}
+#endif
+
+/**
+ * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
+ * @first: first u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
+{
+	HPREAMBLE(4)
+	b |= first;
+	HPOSTAMBLE
+}
+
+/**
+ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
+ * @first: first u32
+ * @second: second u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
+{
+	u64 combined = (u64)second << 32 | first;
+	HPREAMBLE(8)
+	v3 ^= combined;
+	HSIPROUND;
+	v0 ^= combined;
+	HPOSTAMBLE
+}
+
+/**
+ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
+		  const hsiphash_key_t *key)
+{
+	u64 combined = (u64)second << 32 | first;
+	HPREAMBLE(12)
+	v3 ^= combined;
+	HSIPROUND;
+	v0 ^= combined;
+	b |= third;
+	HPOSTAMBLE
+}
+
+/**
+ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @forth: forth u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
+		  const u32 forth, const hsiphash_key_t *key)
+{
+	u64 combined = (u64)second << 32 | first;
+	HPREAMBLE(16)
+	v3 ^= combined;
+	HSIPROUND;
+	v0 ^= combined;
+	combined = (u64)forth << 32 | third;
+	v3 ^= combined;
+	HSIPROUND;
+	v0 ^= combined;
+	HPOSTAMBLE
+}
+#else
+#define HSIPROUND \
+	do { \
+	v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
+	v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
+	v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
+	v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
+	} while (0)
+
+#define HPREAMBLE(len) \
+	u32 v0 = 0; \
+	u32 v1 = 0; \
+	u32 v2 = 0x6c796765U; \
+	u32 v3 = 0x74656462U; \
+	u32 b = ((u32)(len)) << 24; \
+	v3 ^= key->key[1]; \
+	v2 ^= key->key[0]; \
+	v1 ^= key->key[1]; \
+	v0 ^= key->key[0];
+
+#define HPOSTAMBLE \
+	v3 ^= b; \
+	HSIPROUND; \
+	v0 ^= b; \
+	v2 ^= 0xff; \
+	HSIPROUND; \
+	HSIPROUND; \
+	HSIPROUND; \
+	return v1 ^ v3;
+
+u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
+{
+	const u8 *end = data + len - (len % sizeof(u32));
+	const u8 left = len & (sizeof(u32) - 1);
+	u32 m;
+	HPREAMBLE(len)
+	for (; data != end; data += sizeof(u32)) {
+		m = le32_to_cpup(data);
+		v3 ^= m;
+		HSIPROUND;
+		v0 ^= m;
+	}
+	switch (left) {
+	case 3: b |= ((u32)end[2]) << 16;
+	case 2: b |= le16_to_cpup(data); break;
+	case 1: b |= end[0];
+	}
+	HPOSTAMBLE
+}
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+u32 __hsiphash_unaligned(const void *data, size_t len,
+			 const hsiphash_key_t *key)
+{
+	const u8 *end = data + len - (len % sizeof(u32));
+	const u8 left = len & (sizeof(u32) - 1);
+	u32 m;
+	HPREAMBLE(len)
+	for (; data != end; data += sizeof(u32)) {
+		m = get_unaligned_le32(data);
+		v3 ^= m;
+		HSIPROUND;
+		v0 ^= m;
+	}
+	switch (left) {
+	case 3: b |= ((u32)end[2]) << 16;
+	case 2: b |= get_unaligned_le16(end); break;
+	case 1: b |= end[0];
+	}
+	HPOSTAMBLE
+}
+#endif
+
+/**
+ * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
+ * @first: first u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
+{
+	HPREAMBLE(4)
+	v3 ^= first;
+	HSIPROUND;
+	v0 ^= first;
+	HPOSTAMBLE
+}
+
+/**
+ * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
+ * @first: first u32
+ * @second: second u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
+{
+	HPREAMBLE(8)
+	v3 ^= first;
+	HSIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	HSIPROUND;
+	v0 ^= second;
+	HPOSTAMBLE
+}
+
+/**
+ * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
+		  const hsiphash_key_t *key)
+{
+	HPREAMBLE(12)
+	v3 ^= first;
+	HSIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	HSIPROUND;
+	v0 ^= second;
+	v3 ^= third;
+	HSIPROUND;
+	v0 ^= third;
+	HPOSTAMBLE
+}
+
+/**
+ * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
+ * @first: first u32
+ * @second: second u32
+ * @third: third u32
+ * @forth: forth u32
+ * @key: the hsiphash key
+ */
+u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
+		  const u32 forth, const hsiphash_key_t *key)
+{
+	HPREAMBLE(16)
+	v3 ^= first;
+	HSIPROUND;
+	v0 ^= first;
+	v3 ^= second;
+	HSIPROUND;
+	v0 ^= second;
+	v3 ^= third;
+	HSIPROUND;
+	v0 ^= third;
+	v3 ^= forth;
+	HSIPROUND;
+	v0 ^= forth;
+	HPOSTAMBLE
+}
+#endif
diff --git b/net/wireguard/compat/udp_tunnel/include/net/udp_tunnel.h b/net/wireguard/compat/udp_tunnel/include/net/udp_tunnel.h
new file mode 100644
index 0000000..8999527
--- /dev/null
+++ b/net/wireguard/compat/udp_tunnel/include/net/udp_tunnel.h
@@ -0,0 +1,94 @@
+#ifndef _WG_NET_UDP_TUNNEL_H
+#define _WG_NET_UDP_TUNNEL_H
+
+#include <net/ip_tunnels.h>
+#include <net/udp.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#endif
+
+struct udp_port_cfg {
+	u8			family;
+
+	/* Used only for kernel-created sockets */
+	union {
+		struct in_addr		local_ip;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct in6_addr		local_ip6;
+#endif
+	};
+
+	union {
+		struct in_addr		peer_ip;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct in6_addr		peer_ip6;
+#endif
+	};
+
+	__be16			local_udp_port;
+	__be16			peer_udp_port;
+	unsigned int		use_udp_checksums:1,
+				use_udp6_tx_checksums:1,
+				use_udp6_rx_checksums:1,
+				ipv6_v6only:1;
+};
+
+int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
+		     struct socket **sockp);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+		     struct socket **sockp);
+#else
+static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+				   struct socket **sockp)
+{
+	return 0;
+}
+#endif
+
+static inline int udp_sock_create(struct net *net,
+				  struct udp_port_cfg *cfg,
+				  struct socket **sockp)
+{
+	if (cfg->family == AF_INET)
+		return udp_sock_create4(net, cfg, sockp);
+
+	if (cfg->family == AF_INET6)
+		return udp_sock_create6(net, cfg, sockp);
+
+	return -EPFNOSUPPORT;
+}
+
+typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
+
+struct udp_tunnel_sock_cfg {
+	void *sk_user_data;
+	__u8  encap_type;
+	udp_tunnel_encap_rcv_t encap_rcv;
+};
+
+/* Setup the given (UDP) sock to receive UDP encapsulated packets */
+void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
+			   struct udp_tunnel_sock_cfg *sock_cfg);
+
+/* Transmit the skb using UDP encapsulation. */
+void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
+			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
+			 __be16 df, __be16 src_port, __be16 dst_port,
+			 bool xnet, bool nocheck);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
+			 struct sk_buff *skb,
+			 struct net_device *dev, struct in6_addr *saddr,
+			 struct in6_addr *daddr,
+			 __u8 prio, __u8 ttl, __be32 label,
+			 __be16 src_port, __be16 dst_port, bool nocheck);
+#endif
+
+void udp_tunnel_sock_release(struct socket *sock);
+
+#endif /* _WG_NET_UDP_TUNNEL_H */
diff --git b/net/wireguard/compat/udp_tunnel/udp_tunnel.c b/net/wireguard/compat/udp_tunnel/udp_tunnel.c
new file mode 100644
index 0000000..6afb11d
--- /dev/null
+++ b/net/wireguard/compat/udp_tunnel/udp_tunnel.c
@@ -0,0 +1,385 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <net/net_namespace.h>
+#include <net/inet_common.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
+#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk)))
+#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr)
+#endif
+
+/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */
+static udp_tunnel_encap_rcv_t encap_rcv = NULL;
+static void our_sk_data_ready(struct sock *sk
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
+			      ,int unused_vulnerable_length_param
+#endif
+			      )
+{
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		skb_orphan(skb);
+		sk_mem_reclaim(sk);
+		encap_rcv(sk, skb);
+	}
+}
+
+int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
+		     struct socket **sockp)
+{
+	int err;
+	struct socket *sock = NULL;
+	struct sockaddr_in udp_addr;
+
+	err = __sock_create(net, AF_INET, SOCK_DGRAM, 0, &sock, 1);
+	if (err < 0)
+		goto error;
+
+	udp_addr.sin_family = AF_INET;
+	udp_addr.sin_addr = cfg->local_ip;
+	udp_addr.sin_port = cfg->local_udp_port;
+	err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
+			  sizeof(udp_addr));
+	if (err < 0)
+		goto error;
+
+	if (cfg->peer_udp_port) {
+		udp_addr.sin_family = AF_INET;
+		udp_addr.sin_addr = cfg->peer_ip;
+		udp_addr.sin_port = cfg->peer_udp_port;
+		err = kernel_connect(sock, (struct sockaddr *)&udp_addr,
+				     sizeof(udp_addr), 0);
+		if (err < 0)
+			goto error;
+	}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
+	sock->sk->sk_no_check = !cfg->use_udp_checksums;
+#else
+	sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
+#endif
+
+	*sockp = sock;
+	return 0;
+
+error:
+	if (sock) {
+		kernel_sock_shutdown(sock, SHUT_RDWR);
+		sock_release(sock);
+	}
+	*sockp = NULL;
+	return err;
+}
+
+void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
+			   struct udp_tunnel_sock_cfg *cfg)
+{
+	inet_sk(sock->sk)->mc_loop = 0;
+	encap_rcv = cfg->encap_rcv;
+	rcu_assign_sk_user_data(sock->sk, cfg->sk_user_data);
+	sock->sk->sk_data_ready = our_sk_data_ready;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
+static inline __sum16 udp_v4_check(int len, __be32 saddr,
+				   __be32 daddr, __wsum base)
+{
+	return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
+}
+
+static void udp_set_csum(bool nocheck, struct sk_buff *skb,
+		  __be32 saddr, __be32 daddr, int len)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	if (nocheck)
+		uh->check = 0;
+	else if (skb_is_gso(skb))
+		uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+	else if (skb_dst(skb) && skb_dst(skb)->dev &&
+		 (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+	} else {
+		__wsum csum;
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		uh->check = 0;
+		csum = skb_checksum(skb, 0, len, 0);
+		uh->check = udp_v4_check(len, saddr, daddr, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+}
+
+#endif
+
+static void fake_destructor(struct sk_buff *skb)
+{
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static void our_iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
+		  __be32 src, __be32 dst, __u8 proto,
+		  __u8 tos, __u8 ttl, __be16 df, bool xnet)
+{
+	struct iphdr *iph;
+	struct pcpu_tstats *tstats = this_cpu_ptr(skb->dev->tstats);
+
+	skb_scrub_packet(skb, xnet);
+
+	skb->rxhash = 0;
+	skb_dst_set(skb, &rt->dst);
+	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+
+	/* Push down and install the IP header. */
+	skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
+
+	iph = ip_hdr(skb);
+
+	iph->version	=	4;
+	iph->ihl	=	sizeof(struct iphdr) >> 2;
+	iph->frag_off	=	df;
+	iph->protocol	=	proto;
+	iph->tos	=	tos;
+	iph->daddr	=	dst;
+	iph->saddr	=	src;
+	iph->ttl	=	ttl;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 53)
+	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+#else
+	__ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
+#endif
+
+	iptunnel_xmit(skb, skb->dev);
+	u64_stats_update_begin(&tstats->syncp);
+	tstats->tx_bytes -= 8;
+	u64_stats_update_end(&tstats->syncp);
+}
+#define iptunnel_xmit our_iptunnel_xmit
+#endif
+
+void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
+			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
+			 __be16 df, __be16 src_port, __be16 dst_port,
+			 bool xnet, bool nocheck)
+{
+	struct udphdr *uh;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
+	struct net_device *dev = skb->dev;
+	int ret;
+#endif
+
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+
+	uh->dest = dst_port;
+	uh->source = src_port;
+	uh->len = htons(skb->len);
+
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
+	udp_set_csum(nocheck, skb, src, dst, skb->len);
+
+	if (!skb->sk)
+		skb->sk = sk;
+	if (!skb->destructor)
+		skb->destructor = fake_destructor;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
+	ret =
+#endif
+	     iptunnel_xmit(
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
+			   sk,
+#endif
+			   rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
+	if (ret)
+		iptunnel_xmit_stats(ret - 8, &dev->stats, dev->tstats);
+#endif
+}
+
+void udp_tunnel_sock_release(struct socket *sock)
+{
+	rcu_assign_sk_user_data(sock->sk, NULL);
+	kernel_sock_shutdown(sock, SHUT_RDWR);
+	sock_release(sock);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/in6.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+
+int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+		     struct socket **sockp)
+{
+	struct sockaddr_in6 udp6_addr;
+	int err;
+	struct socket *sock = NULL;
+
+	err = __sock_create(net, AF_INET6, SOCK_DGRAM, 0, &sock, 1);
+	if (err < 0)
+		goto error;
+
+	if (cfg->ipv6_v6only) {
+		int val = 1;
+
+		err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
+					(char *) &val, sizeof(val));
+		if (err < 0)
+			goto error;
+	}
+
+	udp6_addr.sin6_family = AF_INET6;
+	memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
+	       sizeof(udp6_addr.sin6_addr));
+	udp6_addr.sin6_port = cfg->local_udp_port;
+	err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
+			  sizeof(udp6_addr));
+	if (err < 0)
+		goto error;
+
+	if (cfg->peer_udp_port) {
+		udp6_addr.sin6_family = AF_INET6;
+		memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
+		       sizeof(udp6_addr.sin6_addr));
+		udp6_addr.sin6_port = cfg->peer_udp_port;
+		err = kernel_connect(sock,
+				     (struct sockaddr *)&udp6_addr,
+				     sizeof(udp6_addr), 0);
+	}
+	if (err < 0)
+		goto error;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
+	sock->sk->sk_no_check = !cfg->use_udp_checksums;
+#else
+	udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
+	udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
+#endif
+
+	*sockp = sock;
+	return 0;
+
+error:
+	if (sock) {
+		kernel_sock_shutdown(sock, SHUT_RDWR);
+		sock_release(sock);
+	}
+	*sockp = NULL;
+	return err;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
+static inline __sum16 udp_v6_check(int len,
+		const struct in6_addr *saddr,
+		const struct in6_addr *daddr,
+		__wsum base)
+{
+	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base);
+}
+static void udp6_set_csum(bool nocheck, struct sk_buff *skb,
+		   const struct in6_addr *saddr,
+		   const struct in6_addr *daddr, int len)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	if (nocheck)
+		uh->check = 0;
+	else if (skb_is_gso(skb))
+		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+	else if (skb_dst(skb) && skb_dst(skb)->dev &&
+		 (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+	} else {
+		__wsum csum;
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		uh->check = 0;
+		csum = skb_checksum(skb, 0, len, 0);
+		uh->check = udp_v6_check(len, saddr, daddr, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+}
+#endif
+
+int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
+			 struct sk_buff *skb,
+			 struct net_device *dev, struct in6_addr *saddr,
+			 struct in6_addr *daddr,
+			 __u8 prio, __u8 ttl, __be32 label,
+			 __be16 src_port, __be16 dst_port, bool nocheck)
+{
+	struct udphdr *uh;
+	struct ipv6hdr *ip6h;
+
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+
+	uh->dest = dst_port;
+	uh->source = src_port;
+
+	uh->len = htons(skb->len);
+
+	skb_dst_set(skb, dst);
+
+	udp6_set_csum(nocheck, skb, saddr, daddr, skb->len);
+
+	__skb_push(skb, sizeof(*ip6h));
+	skb_reset_network_header(skb);
+	ip6h		  = ipv6_hdr(skb);
+	ip6_flow_hdr(ip6h, prio, label);
+	ip6h->payload_len = htons(skb->len);
+	ip6h->nexthdr     = IPPROTO_UDP;
+	ip6h->hop_limit   = ttl;
+	ip6h->daddr	  = *daddr;
+	ip6h->saddr	  = *saddr;
+
+	if (!skb->sk)
+		skb->sk = sk;
+	if (!skb->destructor)
+		skb->destructor = fake_destructor;
+
+	ip6tunnel_xmit(skb, dev);
+	return 0;
+}
+#endif
diff --git b/net/wireguard/compat/udp_tunnel/udp_tunnel_partial_compat.h b/net/wireguard/compat/udp_tunnel/udp_tunnel_partial_compat.h
new file mode 100644
index 0000000..230b3cd
--- /dev/null
+++ b/net/wireguard/compat/udp_tunnel/udp_tunnel_partial_compat.h
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
+#define udp_sock_create4 udp_sock_create
+#define udp_sock_create6 udp_sock_create
+#include <linux/socket.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <net/ip_tunnels.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <linux/in6.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_tunnel.h>
+#endif
+static inline void fake_destructor(struct sk_buff *skb)
+{
+}
+typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
+struct udp_tunnel_sock_cfg {
+        void *sk_user_data;
+        __u8  encap_type;
+        udp_tunnel_encap_rcv_t encap_rcv;
+};
+/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */
+static udp_tunnel_encap_rcv_t encap_rcv = NULL;
+static void our_sk_data_ready(struct sock *sk)
+{
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		skb_orphan(skb);
+		sk_mem_reclaim(sk);
+		encap_rcv(sk, skb);
+	}
+}
+static inline void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
+                           struct udp_tunnel_sock_cfg *cfg)
+{
+	struct sock *sk = sock->sk;
+	inet_sk(sk)->mc_loop = 0;
+	encap_rcv = cfg->encap_rcv;
+	rcu_assign_sk_user_data(sk, cfg->sk_user_data);
+	sk->sk_data_ready = our_sk_data_ready;
+}
+static inline void udp_tunnel_sock_release(struct socket *sock)
+{
+	rcu_assign_sk_user_data(sock->sk, NULL);
+	kernel_sock_shutdown(sock, SHUT_RDWR);
+	sk_release_kernel(sock->sk);
+}
+static inline int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
+                        struct sk_buff *skb, __be32 src, __be32 dst,
+                        __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+                        __be16 dst_port, bool xnet)
+{
+	struct udphdr *uh;
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+	uh->dest = dst_port;
+	uh->source = src_port;
+	uh->len = htons(skb->len);
+	udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
+	return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+			     tos, ttl, df, xnet);
+}
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+                         struct sk_buff *skb, struct net_device *dev,
+                         struct in6_addr *saddr, struct in6_addr *daddr,
+                         __u8 prio, __u8 ttl, __be16 src_port,
+                         __be16 dst_port)
+{
+	struct udphdr *uh;
+	struct ipv6hdr *ip6h;
+	struct sock *sk = sock->sk;
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+	uh->dest = dst_port;
+	uh->source = src_port;
+	uh->len = htons(skb->len);
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
+			    | IPSKB_REROUTED);
+	skb_dst_set(skb, dst);
+	udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr,
+	              &sk->sk_v6_daddr, skb->len);
+	__skb_push(skb, sizeof(*ip6h));
+	skb_reset_network_header(skb);
+	ip6h		  = ipv6_hdr(skb);
+	ip6_flow_hdr(ip6h, prio, htonl(0));
+	ip6h->payload_len = htons(skb->len);
+	ip6h->nexthdr     = IPPROTO_UDP;
+	ip6h->hop_limit   = ttl;
+	ip6h->daddr	  = *daddr;
+	ip6h->saddr	  = *saddr;
+	ip6tunnel_xmit(skb, dev);
+	return 0;
+}
+#endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <linux/skbuff.h>
+#include <linux/if.h>
+#include <net/udp_tunnel.h>
+#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; ret__ = udp_tunnel_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, i, j, k); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
+#if IS_ENABLED(CONFIG_IPV6)
+#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, j, k);
+#endif
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include <linux/if.h>
+#include <net/udp_tunnel.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
+static inline void fake_destructor(struct sk_buff *skb)
+{
+}
+#endif
+#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; if (!(c)->destructor) (c)->destructor = fake_destructor; if (!(c)->sk) (c)->sk = (b); ret__ = udp_tunnel_xmit_skb(a, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
+#if IS_ENABLED(CONFIG_IPV6)
+#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { if (!(c)->destructor) (c)->destructor = fake_destructor; if (!(c)->sk) (c)->sk = (b); udp_tunnel6_xmit_skb(a, c, d, e, f, g, h, j, k, l); } while(0)
+#endif
+#else
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include <linux/if.h>
+#include <net/udp_tunnel.h>
+#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__ = udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
+#include <linux/if.h>
+#include <net/udp_tunnel.h>
+#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__ = udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l); iptunnel_xmit_stats(ret__, &dev__->stats, dev__->tstats); } while (0)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include <linux/if.h>
+#include <net/udp_tunnel.h>
+#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, j, k, l)
+#endif
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
+#include <linux/skbuff.h>
+#include <linux/if.h>
+#include <net/udp_tunnel.h>
+struct udp_port_cfg_new {
+	u8 family;
+	union {
+		struct in_addr local_ip;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct in6_addr local_ip6;
+#endif
+	};
+	union {
+		struct in_addr peer_ip;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct in6_addr peer_ip6;
+#endif
+	};
+	__be16 local_udp_port;
+	__be16 peer_udp_port;
+	unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, ipv6_v6only:1;
+};
+static inline int __maybe_unused udp_sock_create_new(struct net *net, struct udp_port_cfg_new *cfg, struct socket **sockp)
+{
+	struct udp_port_cfg old_cfg = {
+		.family = cfg->family,
+		.local_ip = cfg->local_ip,
+#if IS_ENABLED(CONFIG_IPV6)
+		.local_ip6 = cfg->local_ip6,
+#endif
+		.peer_ip = cfg->peer_ip,
+#if IS_ENABLED(CONFIG_IPV6)
+		.peer_ip6 = cfg->peer_ip6,
+#endif
+		.local_udp_port = cfg->local_udp_port,
+		.peer_udp_port = cfg->peer_udp_port,
+		.use_udp_checksums = cfg->use_udp_checksums,
+		.use_udp6_tx_checksums = cfg->use_udp6_tx_checksums,
+		.use_udp6_rx_checksums = cfg->use_udp6_rx_checksums
+	};
+	if (cfg->family == AF_INET)
+		return udp_sock_create4(net, &old_cfg, sockp);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (cfg->family == AF_INET6) {
+		int ret;
+		int old_bindv6only;
+		struct net *nobns;
+
+		if (cfg->ipv6_v6only) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
+			nobns = &init_net;
+#else
+			nobns = net;
+#endif
+			/* Since udp_port_cfg only learned of ipv6_v6only in 4.3, we do this horrible
+			 * hack here and set the sysctl variable temporarily to something that will
+			 * set the right option for us in sock_create. It's super racey! */
+			old_bindv6only = nobns->ipv6.sysctl.bindv6only;
+			nobns->ipv6.sysctl.bindv6only = 1;
+		}
+		ret = udp_sock_create6(net, &old_cfg, sockp);
+		if (cfg->ipv6_v6only)
+			nobns->ipv6.sysctl.bindv6only = old_bindv6only;
+		return ret;
+	}
+#endif
+	return -EPFNOSUPPORT;
+}
+#define udp_port_cfg udp_port_cfg_new
+#define udp_sock_create(a, b, c) udp_sock_create_new(a, b, c)
+#endif
diff --git b/net/wireguard/cookie.c b/net/wireguard/cookie.c
new file mode 100644
index 0000000..fdecb71
--- /dev/null
+++ b/net/wireguard/cookie.c
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "cookie.h"
+#include "peer.h"
+#include "device.h"
+#include "messages.h"
+#include "ratelimiter.h"
+#include "crypto/blake2s.h"
+#include "crypto/chacha20poly1305.h"
+
+#include <linux/jiffies.h>
+#include <net/ipv6.h>
+#include <crypto/algapi.h>
+
+void cookie_checker_init(struct cookie_checker *checker, struct wireguard_device *wg)
+{
+	init_rwsem(&checker->secret_lock);
+	checker->secret_birthdate = get_jiffies_64();
+	get_random_bytes(checker->secret, NOISE_HASH_LEN);
+	checker->device = wg;
+}
+
+enum { COOKIE_KEY_LABEL_LEN = 8 };
+static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----";
+static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--";
+
+static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN], const u8 pubkey[NOISE_PUBLIC_KEY_LEN], const u8 label[COOKIE_KEY_LABEL_LEN])
+{
+	struct blake2s_state blake;
+
+	blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN);
+	blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN);
+	blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN);
+	blake2s_final(&blake, key, NOISE_SYMMETRIC_KEY_LEN);
+}
+
+void cookie_checker_precompute_device_keys(struct cookie_checker *checker)
+{
+	down_read(&checker->device->static_identity.lock);
+	if (likely(checker->device->static_identity.has_identity)) {
+		precompute_key(checker->cookie_encryption_key, checker->device->static_identity.static_public, cookie_key_label);
+		precompute_key(checker->message_mac1_key, checker->device->static_identity.static_public, mac1_key_label);
+	} else {
+		memset(checker->cookie_encryption_key, 0, NOISE_SYMMETRIC_KEY_LEN);
+		memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN);
+	}
+	up_read(&checker->device->static_identity.lock);
+}
+
+void cookie_checker_precompute_peer_keys(struct wireguard_peer *peer)
+{
+	precompute_key(peer->latest_cookie.cookie_decryption_key, peer->handshake.remote_static, cookie_key_label);
+	precompute_key(peer->latest_cookie.message_mac1_key, peer->handshake.remote_static, mac1_key_label);
+}
+
+void cookie_init(struct cookie *cookie)
+{
+	memset(cookie, 0, sizeof(struct cookie));
+	init_rwsem(&cookie->lock);
+}
+
+static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len, const u8 key[NOISE_SYMMETRIC_KEY_LEN])
+{
+	len = len - sizeof(struct message_macs) + offsetof(struct message_macs, mac1);
+	blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN);
+}
+
+static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len, const u8 cookie[COOKIE_LEN])
+{
+	len = len - sizeof(struct message_macs) + offsetof(struct message_macs, mac2);
+	blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN);
+}
+
+static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb, struct cookie_checker *checker)
+{
+	struct blake2s_state state;
+
+	if (!time_is_after_jiffies64(checker->secret_birthdate + COOKIE_SECRET_MAX_AGE)) {
+		down_write(&checker->secret_lock);
+		checker->secret_birthdate = get_jiffies_64();
+		get_random_bytes(checker->secret, NOISE_HASH_LEN);
+		up_write(&checker->secret_lock);
+	}
+
+	down_read(&checker->secret_lock);
+
+	blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN);
+	if (skb->protocol == htons(ETH_P_IP))
+		blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr, sizeof(struct in_addr));
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr, sizeof(struct in6_addr));
+	blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16));
+	blake2s_final(&state, cookie, COOKIE_LEN);
+
+	up_read(&checker->secret_lock);
+}
+
+enum cookie_mac_state cookie_validate_packet(struct cookie_checker *checker, struct sk_buff *skb, bool check_cookie)
+{
+	u8 computed_mac[COOKIE_LEN];
+	u8 cookie[COOKIE_LEN];
+	enum cookie_mac_state ret;
+	struct message_macs *macs = (struct message_macs *)(skb->data + skb->len - sizeof(struct message_macs));
+
+	ret = INVALID_MAC;
+	compute_mac1(computed_mac, skb->data, skb->len, checker->message_mac1_key);
+	if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN))
+		goto out;
+
+	ret = VALID_MAC_BUT_NO_COOKIE;
+
+	if (!check_cookie)
+		goto out;
+
+	make_cookie(cookie, skb, checker);
+
+	compute_mac2(computed_mac, skb->data, skb->len, cookie);
+	if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN))
+		goto out;
+
+	ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED;
+	if (!ratelimiter_allow(skb, dev_net(checker->device->dev)))
+		goto out;
+
+	ret = VALID_MAC_WITH_COOKIE;
+
+out:
+	return ret;
+}
+
+void cookie_add_mac_to_packet(void *message, size_t len, struct wireguard_peer *peer)
+{
+	struct message_macs *macs = (struct message_macs *)((u8 *)message + len - sizeof(struct message_macs));
+
+	down_write(&peer->latest_cookie.lock);
+	compute_mac1(macs->mac1, message, len, peer->latest_cookie.message_mac1_key);
+	memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN);
+	peer->latest_cookie.have_sent_mac1 = true;
+	up_write(&peer->latest_cookie.lock);
+
+	down_read(&peer->latest_cookie.lock);
+	if (peer->latest_cookie.is_valid && time_is_after_jiffies64(peer->latest_cookie.birthdate + COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY))
+		compute_mac2(macs->mac2, message, len, peer->latest_cookie.cookie);
+	else
+		memset(macs->mac2, 0, COOKIE_LEN);
+	up_read(&peer->latest_cookie.lock);
+}
+
+void cookie_message_create(struct message_handshake_cookie *dst, struct sk_buff *skb, __le32 index, struct cookie_checker *checker)
+{
+	struct message_macs *macs = (struct message_macs *)((u8 *)skb->data + skb->len - sizeof(struct message_macs));
+	u8 cookie[COOKIE_LEN];
+
+	dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE);
+	dst->receiver_index = index;
+	get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN);
+
+	make_cookie(cookie, skb, checker);
+	xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN, macs->mac1, COOKIE_LEN, dst->nonce, checker->cookie_encryption_key);
+}
+
+void cookie_message_consume(struct message_handshake_cookie *src, struct wireguard_device *wg)
+{
+	u8 cookie[COOKIE_LEN];
+	struct index_hashtable_entry *entry;
+	bool ret;
+
+	entry = index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE | INDEX_HASHTABLE_KEYPAIR, src->receiver_index);
+	if (unlikely(!entry))
+		return;
+
+	down_read(&entry->peer->latest_cookie.lock);
+	if (unlikely(!entry->peer->latest_cookie.have_sent_mac1)) {
+		up_read(&entry->peer->latest_cookie.lock);
+		goto out;
+	}
+	ret = xchacha20poly1305_decrypt(cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie), entry->peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce, entry->peer->latest_cookie.cookie_decryption_key);
+	up_read(&entry->peer->latest_cookie.lock);
+
+	if (ret) {
+		down_write(&entry->peer->latest_cookie.lock);
+		memcpy(entry->peer->latest_cookie.cookie, cookie, COOKIE_LEN);
+		entry->peer->latest_cookie.birthdate = get_jiffies_64();
+		entry->peer->latest_cookie.is_valid = true;
+		entry->peer->latest_cookie.have_sent_mac1 = false;
+		up_write(&entry->peer->latest_cookie.lock);
+	} else
+		net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n", wg->dev->name);
+
+out:
+	peer_put(entry->peer);
+}
diff --git b/net/wireguard/cookie.h b/net/wireguard/cookie.h
new file mode 100644
index 0000000..352e9c9
--- /dev/null
+++ b/net/wireguard/cookie.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_COOKIE_H
+#define _WG_COOKIE_H
+
+#include "messages.h"
+#include <linux/rwsem.h>
+
+struct wireguard_peer;
+
+struct cookie_checker {
+	u8 secret[NOISE_HASH_LEN];
+	u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN];
+	u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
+	u64 secret_birthdate;
+	struct rw_semaphore secret_lock;
+	struct wireguard_device *device;
+};
+
+struct cookie {
+	u64 birthdate;
+	bool is_valid;
+	u8 cookie[COOKIE_LEN];
+	bool have_sent_mac1;
+	u8 last_mac1_sent[COOKIE_LEN];
+	u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN];
+	u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
+	struct rw_semaphore lock;
+};
+
+enum cookie_mac_state {
+	INVALID_MAC,
+	VALID_MAC_BUT_NO_COOKIE,
+	VALID_MAC_WITH_COOKIE_BUT_RATELIMITED,
+	VALID_MAC_WITH_COOKIE
+};
+
+void cookie_checker_init(struct cookie_checker *checker, struct wireguard_device *wg);
+void cookie_checker_precompute_device_keys(struct cookie_checker *checker);
+void cookie_checker_precompute_peer_keys(struct wireguard_peer *peer);
+void cookie_init(struct cookie *cookie);
+
+enum cookie_mac_state cookie_validate_packet(struct cookie_checker *checker, struct sk_buff *skb, bool check_cookie);
+void cookie_add_mac_to_packet(void *message, size_t len, struct wireguard_peer *peer);
+
+void cookie_message_create(struct message_handshake_cookie *src, struct sk_buff *skb, __le32 index, struct cookie_checker *checker);
+void cookie_message_consume(struct message_handshake_cookie *src, struct wireguard_device *wg);
+
+#endif /* _WG_COOKIE_H */
diff --git b/net/wireguard/crypto/blake2s-x86_64.S b/net/wireguard/crypto/blake2s-x86_64.S
new file mode 100644
index 0000000..4a8ab75
--- /dev/null
+++ b/net/wireguard/crypto/blake2s-x86_64.S
@@ -0,0 +1,685 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2017 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+
+.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
+.align 32
+IV:	.octa 0xA54FF53A3C6EF372BB67AE856A09E667
+	.octa 0x5BE0CD191F83D9AB9B05688C510E527F
+.section .rodata.cst16.ROT16, "aM", @progbits, 16
+.align 16
+ROT16:	.octa 0x0D0C0F0E09080B0A0504070601000302
+.section .rodata.cst16.ROR328, "aM", @progbits, 16
+.align 16
+ROR328:	.octa 0x0C0F0E0D080B0A090407060500030201
+#ifdef CONFIG_AS_AVX512
+.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 640
+.align 64
+SIGMA:
+.long 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15
+.long 11, 2, 12, 14, 9, 8, 15, 3, 4, 0, 13, 6, 10, 1, 7, 5
+.long 10, 12, 11, 6, 5, 9, 13, 3, 4, 15, 14, 2, 0, 7, 8, 1
+.long 10, 9, 7, 0, 11, 14, 1, 12, 6, 2, 15, 3, 13, 8, 5, 4
+.long 4, 9, 8, 13, 14, 0, 10, 11, 7, 3, 12, 1, 5, 6, 15, 2
+.long 2, 10, 4, 14, 13, 3, 9, 11, 6, 5, 7, 12, 15, 1, 8, 0
+.long 4, 11, 14, 8, 13, 10, 12, 5, 2, 1, 15, 3, 9, 7, 0, 6
+.long 6, 12, 0, 13, 15, 2, 1, 10, 4, 5, 11, 14, 8, 3, 9, 7
+.long 14, 5, 4, 12, 9, 7, 3, 10, 2, 0, 6, 15, 11, 1, 13, 8
+.long 11, 7, 13, 10, 12, 14, 0, 15, 4, 5, 6, 9, 2, 1, 8, 3
+#endif /* CONFIG_AS_AVX512 */
+
+.text
+#ifdef CONFIG_AS_AVX
+ENTRY(blake2s_compress_avx)
+	movl		%ecx, %ecx
+	testq		%rdx, %rdx
+	je .Lendofloop
+	.align 32
+.Lbeginofloop:
+	addq		%rcx, 32(%rdi)
+	vmovdqu		IV+16(%rip), %xmm1
+	vmovdqu		(%rsi), %xmm4
+	vpxor		32(%rdi), %xmm1, %xmm1
+	vmovdqu		16(%rsi), %xmm3
+	vshufps		$136, %xmm3, %xmm4, %xmm6
+	vmovdqa		ROT16(%rip), %xmm7
+	vpaddd		(%rdi), %xmm6, %xmm6
+	vpaddd		16(%rdi), %xmm6, %xmm6
+	vpxor		%xmm6, %xmm1, %xmm1
+	vmovdqu		IV(%rip), %xmm8
+	vpshufb		%xmm7, %xmm1, %xmm1
+	vmovdqu		48(%rsi), %xmm5
+	vpaddd		%xmm1, %xmm8, %xmm8
+	vpxor		16(%rdi), %xmm8, %xmm9
+	vmovdqu		32(%rsi), %xmm2
+	vpblendw	$12, %xmm3, %xmm5, %xmm13
+	vshufps		$221, %xmm5, %xmm2, %xmm12
+	vpunpckhqdq	%xmm2, %xmm4, %xmm14
+	vpslld		$20, %xmm9, %xmm0
+	vpsrld		$12, %xmm9, %xmm9
+	vpxor		%xmm0, %xmm9, %xmm0
+	vshufps		$221, %xmm3, %xmm4, %xmm9
+	vpaddd		%xmm9, %xmm6, %xmm9
+	vpaddd		%xmm0, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm1, %xmm1
+	vmovdqa		ROR328(%rip), %xmm6
+	vpshufb		%xmm6, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm8, %xmm8
+	vpxor		%xmm8, %xmm0, %xmm0
+	vpshufd		$147, %xmm1, %xmm1
+	vpshufd		$78, %xmm8, %xmm8
+	vpslld		$25, %xmm0, %xmm10
+	vpsrld		$7, %xmm0, %xmm0
+	vpxor		%xmm10, %xmm0, %xmm0
+	vshufps		$136, %xmm5, %xmm2, %xmm10
+	vpshufd		$57, %xmm0, %xmm0
+	vpaddd		%xmm10, %xmm9, %xmm9
+	vpaddd		%xmm0, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm1, %xmm1
+	vpaddd		%xmm12, %xmm9, %xmm9
+	vpblendw	$12, %xmm2, %xmm3, %xmm12
+	vpshufb		%xmm7, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm8, %xmm8
+	vpxor		%xmm8, %xmm0, %xmm10
+	vpslld		$20, %xmm10, %xmm0
+	vpsrld		$12, %xmm10, %xmm10
+	vpxor		%xmm0, %xmm10, %xmm0
+	vpaddd		%xmm0, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm1, %xmm1
+	vpshufb		%xmm6, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm8, %xmm8
+	vpxor		%xmm8, %xmm0, %xmm0
+	vpshufd		$57, %xmm1, %xmm1
+	vpshufd		$78, %xmm8, %xmm8
+	vpslld		$25, %xmm0, %xmm10
+	vpsrld		$7, %xmm0, %xmm0
+	vpxor		%xmm10, %xmm0, %xmm0
+	vpslldq		$4, %xmm5, %xmm10
+	vpblendw	$240, %xmm10, %xmm12, %xmm12
+	vpshufd		$147, %xmm0, %xmm0
+	vpshufd		$147, %xmm12, %xmm12
+	vpaddd		%xmm9, %xmm12, %xmm12
+	vpaddd		%xmm0, %xmm12, %xmm12
+	vpxor		%xmm12, %xmm1, %xmm1
+	vpshufb		%xmm7, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm8, %xmm8
+	vpxor		%xmm8, %xmm0, %xmm11
+	vpslld		$20, %xmm11, %xmm9
+	vpsrld		$12, %xmm11, %xmm11
+	vpxor		%xmm9, %xmm11, %xmm0
+	vpshufd		$8, %xmm2, %xmm9
+	vpblendw	$192, %xmm5, %xmm3, %xmm11
+	vpblendw	$240, %xmm11, %xmm9, %xmm9
+	vpshufd		$177, %xmm9, %xmm9
+	vpaddd		%xmm12, %xmm9, %xmm9
+	vpaddd		%xmm0, %xmm9, %xmm11
+	vpxor		%xmm11, %xmm1, %xmm1
+	vpshufb		%xmm6, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm8, %xmm8
+	vpxor		%xmm8, %xmm0, %xmm9
+	vpshufd		$147, %xmm1, %xmm1
+	vpshufd		$78, %xmm8, %xmm8
+	vpslld		$25, %xmm9, %xmm0
+	vpsrld		$7, %xmm9, %xmm9
+	vpxor		%xmm0, %xmm9, %xmm0
+	vpslldq		$4, %xmm3, %xmm9
+	vpblendw	$48, %xmm9, %xmm2, %xmm9
+	vpblendw	$240, %xmm9, %xmm4, %xmm9
+	vpshufd		$57, %xmm0, %xmm0
+	vpshufd		$177, %xmm9, %xmm9
+	vpaddd		%xmm11, %xmm9, %xmm9
+	vpaddd		%xmm0, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm1, %xmm1
+	vpshufb		%xmm7, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm8, %xmm11
+	vpxor		%xmm11, %xmm0, %xmm0
+	vpslld		$20, %xmm0, %xmm8
+	vpsrld		$12, %xmm0, %xmm0
+	vpxor		%xmm8, %xmm0, %xmm0
+	vpunpckhdq	%xmm3, %xmm4, %xmm8
+	vpblendw	$12, %xmm10, %xmm8, %xmm12
+	vpshufd		$177, %xmm12, %xmm12
+	vpaddd		%xmm9, %xmm12, %xmm9
+	vpaddd		%xmm0, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm1, %xmm1
+	vpshufb		%xmm6, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm11, %xmm11
+	vpxor		%xmm11, %xmm0, %xmm0
+	vpshufd		$57, %xmm1, %xmm1
+	vpshufd		$78, %xmm11, %xmm11
+	vpslld		$25, %xmm0, %xmm12
+	vpsrld		$7, %xmm0, %xmm0
+	vpxor		%xmm12, %xmm0, %xmm0
+	vpunpckhdq	%xmm5, %xmm2, %xmm12
+	vpshufd		$147, %xmm0, %xmm0
+	vpblendw	$15, %xmm13, %xmm12, %xmm12
+	vpslldq		$8, %xmm5, %xmm13
+	vpshufd		$210, %xmm12, %xmm12
+	vpaddd		%xmm9, %xmm12, %xmm9
+	vpaddd		%xmm0, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm1, %xmm1
+	vpshufb		%xmm7, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm11, %xmm11
+	vpxor		%xmm11, %xmm0, %xmm0
+	vpslld		$20, %xmm0, %xmm12
+	vpsrld		$12, %xmm0, %xmm0
+	vpxor		%xmm12, %xmm0, %xmm0
+	vpunpckldq	%xmm4, %xmm2, %xmm12
+	vpblendw	$240, %xmm4, %xmm12, %xmm12
+	vpblendw	$192, %xmm13, %xmm12, %xmm12
+	vpsrldq		$12, %xmm3, %xmm13
+	vpaddd		%xmm12, %xmm9, %xmm9
+	vpaddd		%xmm0, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm1, %xmm1
+	vpshufb		%xmm6, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm11, %xmm11
+	vpxor		%xmm11, %xmm0, %xmm0
+	vpshufd		$147, %xmm1, %xmm1
+	vpshufd		$78, %xmm11, %xmm11
+	vpslld		$25, %xmm0, %xmm12
+	vpsrld		$7, %xmm0, %xmm0
+	vpxor		%xmm12, %xmm0, %xmm0
+	vpblendw	$60, %xmm2, %xmm4, %xmm12
+	vpblendw	$3, %xmm13, %xmm12, %xmm12
+	vpshufd		$57, %xmm0, %xmm0
+	vpshufd		$78, %xmm12, %xmm12
+	vpaddd		%xmm9, %xmm12, %xmm9
+	vpaddd		%xmm0, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm1, %xmm1
+	vpshufb		%xmm7, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm11, %xmm11
+	vpxor		%xmm11, %xmm0, %xmm12
+	vpslld		$20, %xmm12, %xmm13
+	vpsrld		$12, %xmm12, %xmm0
+	vpblendw	$51, %xmm3, %xmm4, %xmm12
+	vpxor		%xmm13, %xmm0, %xmm0
+	vpblendw	$192, %xmm10, %xmm12, %xmm10
+	vpslldq		$8, %xmm2, %xmm12
+	vpshufd		$27, %xmm10, %xmm10
+	vpaddd		%xmm9, %xmm10, %xmm9
+	vpaddd		%xmm0, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm1, %xmm1
+	vpshufb		%xmm6, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm11, %xmm11
+	vpxor		%xmm11, %xmm0, %xmm0
+	vpshufd		$57, %xmm1, %xmm1
+	vpshufd		$78, %xmm11, %xmm11
+	vpslld		$25, %xmm0, %xmm10
+	vpsrld		$7, %xmm0, %xmm0
+	vpxor		%xmm10, %xmm0, %xmm0
+	vpunpckhdq	%xmm2, %xmm8, %xmm10
+	vpshufd		$147, %xmm0, %xmm0
+	vpblendw	$12, %xmm5, %xmm10, %xmm10
+	vpshufd		$210, %xmm10, %xmm10
+	vpaddd		%xmm9, %xmm10, %xmm9
+	vpaddd		%xmm0, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm1, %xmm1
+	vpshufb		%xmm7, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm11, %xmm11
+	vpxor		%xmm11, %xmm0, %xmm10
+	vpslld		$20, %xmm10, %xmm0
+	vpsrld		$12, %xmm10, %xmm10
+	vpxor		%xmm0, %xmm10, %xmm0
+	vpblendw	$12, %xmm4, %xmm5, %xmm10
+	vpblendw	$192, %xmm12, %xmm10, %xmm10
+	vpunpckldq	%xmm2, %xmm4, %xmm12
+	vpshufd		$135, %xmm10, %xmm10
+	vpaddd		%xmm9, %xmm10, %xmm9
+	vpaddd		%xmm0, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm1, %xmm1
+	vpshufb		%xmm6, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm11, %xmm13
+	vpxor		%xmm13, %xmm0, %xmm0
+	vpshufd		$147, %xmm1, %xmm1
+	vpshufd		$78, %xmm13, %xmm13
+	vpslld		$25, %xmm0, %xmm10
+	vpsrld		$7, %xmm0, %xmm0
+	vpxor		%xmm10, %xmm0, %xmm0
+	vpblendw	$15, %xmm3, %xmm4, %xmm10
+	vpblendw	$192, %xmm5, %xmm10, %xmm10
+	vpshufd		$57, %xmm0, %xmm0
+	vpshufd		$198, %xmm10, %xmm10
+	vpaddd		%xmm9, %xmm10, %xmm10
+	vpaddd		%xmm0, %xmm10, %xmm10
+	vpxor		%xmm10, %xmm1, %xmm1
+	vpshufb		%xmm7, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm13, %xmm13
+	vpxor		%xmm13, %xmm0, %xmm9
+	vpslld		$20, %xmm9, %xmm0
+	vpsrld		$12, %xmm9, %xmm9
+	vpxor		%xmm0, %xmm9, %xmm0
+	vpunpckhdq	%xmm2, %xmm3, %xmm9
+	vpunpcklqdq	%xmm12, %xmm9, %xmm15
+	vpunpcklqdq	%xmm12, %xmm8, %xmm12
+	vpblendw	$15, %xmm5, %xmm8, %xmm8
+	vpaddd		%xmm15, %xmm10, %xmm15
+	vpaddd		%xmm0, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm1, %xmm1
+	vpshufd		$141, %xmm8, %xmm8
+	vpshufb		%xmm6, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm13, %xmm13
+	vpxor		%xmm13, %xmm0, %xmm0
+	vpshufd		$57, %xmm1, %xmm1
+	vpshufd		$78, %xmm13, %xmm13
+	vpslld		$25, %xmm0, %xmm10
+	vpsrld		$7, %xmm0, %xmm0
+	vpxor		%xmm10, %xmm0, %xmm0
+	vpunpcklqdq	%xmm2, %xmm3, %xmm10
+	vpshufd		$147, %xmm0, %xmm0
+	vpblendw	$51, %xmm14, %xmm10, %xmm14
+	vpshufd		$135, %xmm14, %xmm14
+	vpaddd		%xmm15, %xmm14, %xmm14
+	vpaddd		%xmm0, %xmm14, %xmm14
+	vpxor		%xmm14, %xmm1, %xmm1
+	vpunpcklqdq	%xmm3, %xmm4, %xmm15
+	vpshufb		%xmm7, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm13, %xmm13
+	vpxor		%xmm13, %xmm0, %xmm0
+	vpslld		$20, %xmm0, %xmm11
+	vpsrld		$12, %xmm0, %xmm0
+	vpxor		%xmm11, %xmm0, %xmm0
+	vpunpckhqdq	%xmm5, %xmm3, %xmm11
+	vpblendw	$51, %xmm15, %xmm11, %xmm11
+	vpunpckhqdq	%xmm3, %xmm5, %xmm15
+	vpaddd		%xmm11, %xmm14, %xmm11
+	vpaddd		%xmm0, %xmm11, %xmm11
+	vpxor		%xmm11, %xmm1, %xmm1
+	vpshufb		%xmm6, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm13, %xmm13
+	vpxor		%xmm13, %xmm0, %xmm0
+	vpshufd		$147, %xmm1, %xmm1
+	vpshufd		$78, %xmm13, %xmm13
+	vpslld		$25, %xmm0, %xmm14
+	vpsrld		$7, %xmm0, %xmm0
+	vpxor		%xmm14, %xmm0, %xmm14
+	vpunpckhqdq	%xmm4, %xmm2, %xmm0
+	vpshufd		$57, %xmm14, %xmm14
+	vpblendw	$51, %xmm15, %xmm0, %xmm15
+	vpaddd		%xmm15, %xmm11, %xmm15
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm1, %xmm1
+	vpshufb		%xmm7, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm13, %xmm13
+	vpxor		%xmm13, %xmm14, %xmm14
+	vpslld		$20, %xmm14, %xmm11
+	vpsrld		$12, %xmm14, %xmm14
+	vpxor		%xmm11, %xmm14, %xmm14
+	vpblendw	$3, %xmm2, %xmm4, %xmm11
+	vpslldq		$8, %xmm11, %xmm0
+	vpblendw	$15, %xmm5, %xmm0, %xmm0
+	vpshufd		$99, %xmm0, %xmm0
+	vpaddd		%xmm15, %xmm0, %xmm15
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm1, %xmm0
+	vpaddd		%xmm12, %xmm15, %xmm15
+	vpshufb		%xmm6, %xmm0, %xmm0
+	vpaddd		%xmm0, %xmm13, %xmm13
+	vpxor		%xmm13, %xmm14, %xmm14
+	vpshufd		$57, %xmm0, %xmm0
+	vpshufd		$78, %xmm13, %xmm13
+	vpslld		$25, %xmm14, %xmm1
+	vpsrld		$7, %xmm14, %xmm14
+	vpxor		%xmm1, %xmm14, %xmm14
+	vpblendw	$3, %xmm5, %xmm4, %xmm1
+	vpshufd		$147, %xmm14, %xmm14
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm0, %xmm0
+	vpshufb		%xmm7, %xmm0, %xmm0
+	vpaddd		%xmm0, %xmm13, %xmm13
+	vpxor		%xmm13, %xmm14, %xmm14
+	vpslld		$20, %xmm14, %xmm12
+	vpsrld		$12, %xmm14, %xmm14
+	vpxor		%xmm12, %xmm14, %xmm14
+	vpsrldq		$4, %xmm2, %xmm12
+	vpblendw	$60, %xmm12, %xmm1, %xmm1
+	vpaddd		%xmm1, %xmm15, %xmm15
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm0, %xmm0
+	vpblendw	$12, %xmm4, %xmm3, %xmm1
+	vpshufb		%xmm6, %xmm0, %xmm0
+	vpaddd		%xmm0, %xmm13, %xmm13
+	vpxor		%xmm13, %xmm14, %xmm14
+	vpshufd		$147, %xmm0, %xmm0
+	vpshufd		$78, %xmm13, %xmm13
+	vpslld		$25, %xmm14, %xmm12
+	vpsrld		$7, %xmm14, %xmm14
+	vpxor		%xmm12, %xmm14, %xmm14
+	vpsrldq		$4, %xmm5, %xmm12
+	vpblendw	$48, %xmm12, %xmm1, %xmm1
+	vpshufd		$33, %xmm5, %xmm12
+	vpshufd		$57, %xmm14, %xmm14
+	vpshufd		$108, %xmm1, %xmm1
+	vpblendw	$51, %xmm12, %xmm10, %xmm12
+	vpaddd		%xmm15, %xmm1, %xmm15
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm0, %xmm0
+	vpaddd		%xmm12, %xmm15, %xmm15
+	vpshufb		%xmm7, %xmm0, %xmm0
+	vpaddd		%xmm0, %xmm13, %xmm1
+	vpxor		%xmm1, %xmm14, %xmm14
+	vpslld		$20, %xmm14, %xmm13
+	vpsrld		$12, %xmm14, %xmm14
+	vpxor		%xmm13, %xmm14, %xmm14
+	vpslldq		$12, %xmm3, %xmm13
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm0, %xmm0
+	vpshufb		%xmm6, %xmm0, %xmm0
+	vpaddd		%xmm0, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm14, %xmm14
+	vpshufd		$57, %xmm0, %xmm0
+	vpshufd		$78, %xmm1, %xmm1
+	vpslld		$25, %xmm14, %xmm12
+	vpsrld		$7, %xmm14, %xmm14
+	vpxor		%xmm12, %xmm14, %xmm14
+	vpblendw	$51, %xmm5, %xmm4, %xmm12
+	vpshufd		$147, %xmm14, %xmm14
+	vpblendw	$192, %xmm13, %xmm12, %xmm12
+	vpaddd		%xmm12, %xmm15, %xmm15
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm0, %xmm0
+	vpsrldq		$4, %xmm3, %xmm12
+	vpshufb		%xmm7, %xmm0, %xmm0
+	vpaddd		%xmm0, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm14, %xmm14
+	vpslld		$20, %xmm14, %xmm13
+	vpsrld		$12, %xmm14, %xmm14
+	vpxor		%xmm13, %xmm14, %xmm14
+	vpblendw	$48, %xmm2, %xmm5, %xmm13
+	vpblendw	$3, %xmm12, %xmm13, %xmm13
+	vpshufd		$156, %xmm13, %xmm13
+	vpaddd		%xmm15, %xmm13, %xmm15
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm0, %xmm0
+	vpshufb		%xmm6, %xmm0, %xmm0
+	vpaddd		%xmm0, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm14, %xmm14
+	vpshufd		$147, %xmm0, %xmm0
+	vpshufd		$78, %xmm1, %xmm1
+	vpslld		$25, %xmm14, %xmm13
+	vpsrld		$7, %xmm14, %xmm14
+	vpxor		%xmm13, %xmm14, %xmm14
+	vpunpcklqdq	%xmm2, %xmm4, %xmm13
+	vpshufd		$57, %xmm14, %xmm14
+	vpblendw	$12, %xmm12, %xmm13, %xmm12
+	vpshufd		$180, %xmm12, %xmm12
+	vpaddd		%xmm15, %xmm12, %xmm15
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm0, %xmm0
+	vpshufb		%xmm7, %xmm0, %xmm0
+	vpaddd		%xmm0, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm14, %xmm14
+	vpslld		$20, %xmm14, %xmm12
+	vpsrld		$12, %xmm14, %xmm14
+	vpxor		%xmm12, %xmm14, %xmm14
+	vpunpckhqdq	%xmm9, %xmm4, %xmm12
+	vpshufd		$198, %xmm12, %xmm12
+	vpaddd		%xmm15, %xmm12, %xmm15
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm0, %xmm0
+	vpaddd		%xmm15, %xmm8, %xmm15
+	vpshufb		%xmm6, %xmm0, %xmm0
+	vpaddd		%xmm0, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm14, %xmm14
+	vpshufd		$57, %xmm0, %xmm0
+	vpshufd		$78, %xmm1, %xmm1
+	vpslld		$25, %xmm14, %xmm12
+	vpsrld		$7, %xmm14, %xmm14
+	vpxor		%xmm12, %xmm14, %xmm14
+	vpsrldq		$4, %xmm4, %xmm12
+	vpshufd		$147, %xmm14, %xmm14
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm15, %xmm0, %xmm0
+	vpshufb		%xmm7, %xmm0, %xmm0
+	vpaddd		%xmm0, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm14, %xmm14
+	vpslld		$20, %xmm14, %xmm8
+	vpsrld		$12, %xmm14, %xmm14
+	vpxor		%xmm14, %xmm8, %xmm14
+	vpblendw	$48, %xmm5, %xmm2, %xmm8
+	vpblendw	$3, %xmm12, %xmm8, %xmm8
+	vpunpckhqdq	%xmm5, %xmm4, %xmm12
+	vpshufd		$75, %xmm8, %xmm8
+	vpblendw	$60, %xmm10, %xmm12, %xmm10
+	vpaddd		%xmm15, %xmm8, %xmm15
+	vpaddd		%xmm14, %xmm15, %xmm15
+	vpxor		%xmm0, %xmm15, %xmm0
+	vpshufd		$45, %xmm10, %xmm10
+	vpshufb		%xmm6, %xmm0, %xmm0
+	vpaddd		%xmm15, %xmm10, %xmm15
+	vpaddd		%xmm0, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm14, %xmm14
+	vpshufd		$147, %xmm0, %xmm0
+	vpshufd		$78, %xmm1, %xmm1
+	vpslld		$25, %xmm14, %xmm8
+	vpsrld		$7, %xmm14, %xmm14
+	vpxor		%xmm14, %xmm8, %xmm8
+	vpshufd		$57, %xmm8, %xmm8
+	vpaddd		%xmm8, %xmm15, %xmm15
+	vpxor		%xmm0, %xmm15, %xmm0
+	vpshufb		%xmm7, %xmm0, %xmm0
+	vpaddd		%xmm0, %xmm1, %xmm1
+	vpxor		%xmm8, %xmm1, %xmm8
+	vpslld		$20, %xmm8, %xmm10
+	vpsrld		$12, %xmm8, %xmm8
+	vpxor		%xmm8, %xmm10, %xmm10
+	vpunpckldq	%xmm3, %xmm4, %xmm8
+	vpunpcklqdq	%xmm9, %xmm8, %xmm9
+	vpaddd		%xmm9, %xmm15, %xmm9
+	vpaddd		%xmm10, %xmm9, %xmm9
+	vpxor		%xmm0, %xmm9, %xmm8
+	vpshufb		%xmm6, %xmm8, %xmm8
+	vpaddd		%xmm8, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm10, %xmm10
+	vpshufd		$57, %xmm8, %xmm8
+	vpshufd		$78, %xmm1, %xmm1
+	vpslld		$25, %xmm10, %xmm12
+	vpsrld		$7, %xmm10, %xmm10
+	vpxor		%xmm10, %xmm12, %xmm10
+	vpblendw	$48, %xmm4, %xmm3, %xmm12
+	vpshufd		$147, %xmm10, %xmm0
+	vpunpckhdq	%xmm5, %xmm3, %xmm10
+	vpshufd		$78, %xmm12, %xmm12
+	vpunpcklqdq	%xmm4, %xmm10, %xmm10
+	vpblendw	$192, %xmm2, %xmm10, %xmm10
+	vpshufhw	$78, %xmm10, %xmm10
+	vpaddd		%xmm10, %xmm9, %xmm10
+	vpaddd		%xmm0, %xmm10, %xmm10
+	vpxor		%xmm8, %xmm10, %xmm8
+	vpshufb		%xmm7, %xmm8, %xmm8
+	vpaddd		%xmm8, %xmm1, %xmm1
+	vpxor		%xmm0, %xmm1, %xmm9
+	vpslld		$20, %xmm9, %xmm0
+	vpsrld		$12, %xmm9, %xmm9
+	vpxor		%xmm9, %xmm0, %xmm0
+	vpunpckhdq	%xmm5, %xmm4, %xmm9
+	vpblendw	$240, %xmm9, %xmm2, %xmm13
+	vpshufd		$39, %xmm13, %xmm13
+	vpaddd		%xmm10, %xmm13, %xmm10
+	vpaddd		%xmm0, %xmm10, %xmm10
+	vpxor		%xmm8, %xmm10, %xmm8
+	vpblendw	$12, %xmm4, %xmm2, %xmm13
+	vpshufb		%xmm6, %xmm8, %xmm8
+	vpslldq		$4, %xmm13, %xmm13
+	vpblendw	$15, %xmm5, %xmm13, %xmm13
+	vpaddd		%xmm8, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm0, %xmm0
+	vpaddd		%xmm13, %xmm10, %xmm13
+	vpshufd		$147, %xmm8, %xmm8
+	vpshufd		$78, %xmm1, %xmm1
+	vpslld		$25, %xmm0, %xmm14
+	vpsrld		$7, %xmm0, %xmm0
+	vpxor		%xmm0, %xmm14, %xmm14
+	vpshufd		$57, %xmm14, %xmm14
+	vpaddd		%xmm14, %xmm13, %xmm13
+	vpxor		%xmm8, %xmm13, %xmm8
+	vpaddd		%xmm13, %xmm12, %xmm12
+	vpshufb		%xmm7, %xmm8, %xmm8
+	vpaddd		%xmm8, %xmm1, %xmm1
+	vpxor		%xmm14, %xmm1, %xmm14
+	vpslld		$20, %xmm14, %xmm10
+	vpsrld		$12, %xmm14, %xmm14
+	vpxor		%xmm14, %xmm10, %xmm10
+	vpaddd		%xmm10, %xmm12, %xmm12
+	vpxor		%xmm8, %xmm12, %xmm8
+	vpshufb		%xmm6, %xmm8, %xmm8
+	vpaddd		%xmm8, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm10, %xmm0
+	vpshufd		$57, %xmm8, %xmm8
+	vpshufd		$78, %xmm1, %xmm1
+	vpslld		$25, %xmm0, %xmm10
+	vpsrld		$7, %xmm0, %xmm0
+	vpxor		%xmm0, %xmm10, %xmm10
+	vpblendw	$48, %xmm2, %xmm3, %xmm0
+	vpblendw	$15, %xmm11, %xmm0, %xmm0
+	vpshufd		$147, %xmm10, %xmm10
+	vpshufd		$114, %xmm0, %xmm0
+	vpaddd		%xmm12, %xmm0, %xmm0
+	vpaddd		%xmm10, %xmm0, %xmm0
+	vpxor		%xmm8, %xmm0, %xmm8
+	vpshufb		%xmm7, %xmm8, %xmm8
+	vpaddd		%xmm8, %xmm1, %xmm1
+	vpxor		%xmm10, %xmm1, %xmm10
+	vpslld		$20, %xmm10, %xmm11
+	vpsrld		$12, %xmm10, %xmm10
+	vpxor		%xmm10, %xmm11, %xmm10
+	vpslldq		$4, %xmm4, %xmm11
+	vpblendw	$192, %xmm11, %xmm3, %xmm3
+	vpunpckldq	%xmm5, %xmm4, %xmm4
+	vpshufd		$99, %xmm3, %xmm3
+	vpaddd		%xmm0, %xmm3, %xmm3
+	vpaddd		%xmm10, %xmm3, %xmm3
+	vpxor		%xmm8, %xmm3, %xmm11
+	vpunpckldq	%xmm5, %xmm2, %xmm0
+	vpblendw	$192, %xmm2, %xmm5, %xmm2
+	vpshufb		%xmm6, %xmm11, %xmm11
+	vpunpckhqdq	%xmm0, %xmm9, %xmm0
+	vpblendw	$15, %xmm4, %xmm2, %xmm4
+	vpaddd		%xmm11, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm10, %xmm10
+	vpshufd		$147, %xmm11, %xmm11
+	vpshufd		$201, %xmm0, %xmm0
+	vpslld		$25, %xmm10, %xmm8
+	vpsrld		$7, %xmm10, %xmm10
+	vpxor		%xmm10, %xmm8, %xmm10
+	vpshufd		$78, %xmm1, %xmm1
+	vpaddd		%xmm3, %xmm0, %xmm0
+	vpshufd		$27, %xmm4, %xmm4
+	vpshufd		$57, %xmm10, %xmm10
+	vpaddd		%xmm10, %xmm0, %xmm0
+	vpxor		%xmm11, %xmm0, %xmm11
+	vpaddd		%xmm0, %xmm4, %xmm0
+	vpshufb		%xmm7, %xmm11, %xmm7
+	vpaddd		%xmm7, %xmm1, %xmm1
+	vpxor		%xmm10, %xmm1, %xmm10
+	vpslld		$20, %xmm10, %xmm8
+	vpsrld		$12, %xmm10, %xmm10
+	vpxor		%xmm10, %xmm8, %xmm8
+	vpaddd		%xmm8, %xmm0, %xmm0
+	vpxor		%xmm7, %xmm0, %xmm7
+	vpshufb		%xmm6, %xmm7, %xmm6
+	vpaddd		%xmm6, %xmm1, %xmm1
+	vpxor		%xmm1, %xmm8, %xmm8
+	vpshufd		$78, %xmm1, %xmm1
+	vpshufd		$57, %xmm6, %xmm6
+	vpslld		$25, %xmm8, %xmm2
+	vpsrld		$7, %xmm8, %xmm8
+	vpxor		%xmm8, %xmm2, %xmm8
+	vpxor		(%rdi), %xmm1, %xmm1
+	vpshufd		$147, %xmm8, %xmm8
+	vpxor		%xmm0, %xmm1, %xmm0
+	vmovups		%xmm0, (%rdi)
+	vpxor		16(%rdi), %xmm8, %xmm0
+	vpxor		%xmm6, %xmm0, %xmm6
+	vmovups		%xmm6, 16(%rdi)
+	addq		$64, %rsi
+	decq		%rdx
+	jnz .Lbeginofloop
+.Lendofloop:
+	ret
+ENDPROC(blake2s_compress_avx)
+#endif /* CONFIG_AS_AVX */
+
+#ifdef CONFIG_AS_AVX512
+ENTRY(blake2s_compress_avx512)
+	vmovdqu (%rdi),%xmm0
+	vmovdqu		0x10(%rdi),%xmm1
+	vmovdqu		0x20(%rdi),%xmm4
+	vmovq		%rcx,%xmm5
+	vmovdqa		IV(%rip),%xmm14
+	vmovdqa		IV+16(%rip),%xmm15
+	jmp		.Lblake2s_compress_avx512_mainloop
+.align 32
+.Lblake2s_compress_avx512_mainloop:
+	vmovdqa		%xmm0,%xmm10
+	vmovdqa		%xmm1,%xmm11
+	vpaddq		%xmm5,%xmm4,%xmm4
+	vmovdqa		%xmm14,%xmm2
+	vpxor		%xmm15,%xmm4,%xmm3
+	vmovdqu		(%rsi),%ymm6
+	vmovdqu		0x20(%rsi),%ymm7
+	addq		$0x40,%rsi
+	leaq		SIGMA(%rip),%rax
+	movb		$0xa,%cl
+.Lblake2s_compress_avx512_roundloop:
+	addq		$0x40,%rax
+	vmovdqa		-0x40(%rax),%ymm8
+	vmovdqa		-0x20(%rax),%ymm9
+	vpermi2d	%ymm7,%ymm6,%ymm8
+	vpermi2d	%ymm7,%ymm6,%ymm9
+	vmovdqa		%ymm8,%ymm6
+	vmovdqa		%ymm9,%ymm7
+	vpaddd		%xmm8,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x10,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0xc,%xmm1,%xmm1
+	vextracti128	$0x1,%ymm8,%xmm8
+	vpaddd		%xmm8,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x8,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0x7,%xmm1,%xmm1
+	vpshufd		$0x39,%xmm1,%xmm1
+	vpshufd		$0x4e,%xmm2,%xmm2
+	vpshufd		$0x93,%xmm3,%xmm3
+	vpaddd		%xmm9,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x10,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0xc,%xmm1,%xmm1
+	vextracti128	$0x1,%ymm9,%xmm9
+	vpaddd		%xmm9,%xmm0,%xmm0
+	vpaddd		%xmm1,%xmm0,%xmm0
+	vpxor		%xmm0,%xmm3,%xmm3
+	vprord		$0x8,%xmm3,%xmm3
+	vpaddd		%xmm3,%xmm2,%xmm2
+	vpxor		%xmm2,%xmm1,%xmm1
+	vprord		$0x7,%xmm1,%xmm1
+	vpshufd		$0x93,%xmm1,%xmm1
+	vpshufd		$0x4e,%xmm2,%xmm2
+	vpshufd		$0x39,%xmm3,%xmm3
+	decb		%cl
+	jne		.Lblake2s_compress_avx512_roundloop
+	vpxor		%xmm10,%xmm0,%xmm0
+	vpxor		%xmm11,%xmm1,%xmm1
+	vpxor		%xmm2,%xmm0,%xmm0
+	vpxor		%xmm3,%xmm1,%xmm1
+	decq		%rdx
+	jne		.Lblake2s_compress_avx512_mainloop
+	vmovdqu		%xmm0,(%rdi)
+	vmovdqu		%xmm1,0x10(%rdi)
+	vmovdqu		%xmm4,0x20(%rdi)
+	vzeroupper
+	retq
+ENDPROC(blake2s_compress_avx512)
+#endif /* CONFIG_AS_AVX512 */
diff --git b/net/wireguard/crypto/blake2s.c b/net/wireguard/crypto/blake2s.c
new file mode 100644
index 0000000..1f4052d
--- /dev/null
+++ b/net/wireguard/crypto/blake2s.c
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Original author: Samuel Neves <sneves@dei.uc.pt>
+ */
+
+#include "blake2s.h"
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <asm/unaligned.h>
+
+typedef struct {
+	u8 digest_length;
+	u8 key_length;
+	u8 fanout;
+	u8 depth;
+	u32 leaf_length;
+	u32 node_offset;
+	u16 xof_length;
+	u8 node_depth;
+	u8 inner_length;
+	u8 salt[8];
+	u8 personal[8];
+} __packed blake2s_param;
+
+static const u32 blake2s_iv[8] = {
+	0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
+	0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
+};
+
+static const u8 blake2s_sigma[10][16] = {
+	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+	{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+	{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
+	{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
+	{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
+	{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
+	{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
+	{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
+	{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
+	{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
+};
+
+static inline u32 le32_to_cpuvp(const void *p)
+{
+	return le32_to_cpup(p);
+}
+
+static inline void blake2s_set_lastblock(struct blake2s_state *state)
+{
+	if (state->last_node)
+		state->f[1] = -1;
+	state->f[0] = -1;
+}
+
+static inline void blake2s_increment_counter(struct blake2s_state *state, const u32 inc)
+{
+	state->t[0] += inc;
+	state->t[1] += (state->t[0] < inc);
+}
+
+static inline void blake2s_init_param(struct blake2s_state *state, const blake2s_param *param)
+{
+	const __le32 *p;
+	int i;
+
+	memset(state, 0, sizeof(struct blake2s_state));
+	for (i = 0; i < 8; ++i)
+		state->h[i] = blake2s_iv[i];
+	p = (const __le32 *)param;
+	/* IV XOR ParamBlock */
+	for (i = 0; i < 8; ++i)
+		state->h[i] ^= le32_to_cpu(p[i]);
+}
+
+void blake2s_init(struct blake2s_state *state, const size_t outlen)
+{
+	blake2s_param param __aligned(__alignof__(u32)) = {
+		.digest_length = outlen,
+		.fanout = 1,
+		.depth = 1
+	};
+
+#ifdef DEBUG
+	BUG_ON(!outlen || outlen > BLAKE2S_OUTBYTES);
+#endif
+	blake2s_init_param(state, &param);
+}
+
+void blake2s_init_key(struct blake2s_state *state, const size_t outlen, const void *key, const size_t keylen)
+{
+	blake2s_param param = {
+		.digest_length = outlen,
+		.key_length = keylen,
+		.fanout = 1,
+		.depth = 1
+	};
+	u8 block[BLAKE2S_BLOCKBYTES] = { 0 };
+
+#ifdef DEBUG
+	BUG_ON(!outlen || outlen > BLAKE2S_OUTBYTES || !key || !keylen || keylen > BLAKE2S_KEYBYTES);
+#endif
+	blake2s_init_param(state, &param);
+	memcpy(block, key, keylen);
+	blake2s_update(state, block, BLAKE2S_BLOCKBYTES);
+	memzero_explicit(block, BLAKE2S_BLOCKBYTES);
+}
+
+#ifdef CONFIG_X86_64
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/fpu/api.h>
+#include <asm/simd.h>
+static bool blake2s_use_avx __read_mostly;
+static bool blake2s_use_avx512 __read_mostly;
+void __init blake2s_fpu_init(void)
+{
+	blake2s_use_avx = boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+#ifndef COMPAT_CANNOT_USE_AVX512
+	blake2s_use_avx512 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VL) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_ZMM_Hi256, NULL);
+#endif
+}
+#ifdef CONFIG_AS_AVX
+asmlinkage void blake2s_compress_avx(struct blake2s_state *state, const u8 *block, size_t nblocks, u32 inc);
+#endif
+#ifdef CONFIG_AS_AVX512
+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, const u8 *block, size_t nblocks, u32 inc);
+#endif
+#else
+void __init blake2s_fpu_init(void) { }
+#endif
+
+static inline void blake2s_compress(struct blake2s_state *state, const u8 *block, size_t nblocks, u32 inc)
+{
+	u32 m[16];
+	u32 v[16];
+	int i;
+
+#ifdef DEBUG
+	BUG_ON(nblocks > 1 && inc != BLAKE2S_BLOCKBYTES);
+#endif
+
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_AS_AVX512
+	if (blake2s_use_avx512 && irq_fpu_usable()) {
+		kernel_fpu_begin();
+		blake2s_compress_avx512(state, block, nblocks, inc);
+		kernel_fpu_end();
+		return;
+	}
+#endif
+#ifdef CONFIG_AS_AVX
+	if (blake2s_use_avx && irq_fpu_usable()) {
+		kernel_fpu_begin();
+		blake2s_compress_avx(state, block, nblocks, inc);
+		kernel_fpu_end();
+		return;
+	}
+#endif
+#endif
+
+	while (nblocks > 0) {
+		blake2s_increment_counter(state, inc);
+
+#ifdef __LITTLE_ENDIAN
+		memcpy(m, block, BLAKE2S_BLOCKBYTES);
+#else
+		for (i = 0; i < 16; ++i)
+			m[i] = get_unaligned_le32(block + i * sizeof(m[i]));
+#endif
+		memcpy(v, state->h, 32);
+		v[ 8] = blake2s_iv[0];
+		v[ 9] = blake2s_iv[1];
+		v[10] = blake2s_iv[2];
+		v[11] = blake2s_iv[3];
+		v[12] = blake2s_iv[4] ^ state->t[0];
+		v[13] = blake2s_iv[5] ^ state->t[1];
+		v[14] = blake2s_iv[6] ^ state->f[0];
+		v[15] = blake2s_iv[7] ^ state->f[1];
+
+#define G(r, i, a, b, c, d) do { \
+	a += b + m[blake2s_sigma[r][2 * i + 0]]; \
+	d = ror32(d ^ a, 16); \
+	c += d; \
+	b = ror32(b ^ c, 12); \
+	a += b + m[blake2s_sigma[r][2 * i + 1]]; \
+	d = ror32(d ^ a, 8); \
+	c += d; \
+	b = ror32(b ^ c, 7); \
+} while (0)
+
+#define ROUND(r) do { \
+	G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
+	G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
+	G(r, 2, v[2], v[ 6], v[10], v[14]); \
+	G(r, 3, v[3], v[ 7], v[11], v[15]); \
+	G(r, 4, v[0], v[ 5], v[10], v[15]); \
+	G(r, 5, v[1], v[ 6], v[11], v[12]); \
+	G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
+	G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
+} while (0)
+		ROUND(0);
+		ROUND(1);
+		ROUND(2);
+		ROUND(3);
+		ROUND(4);
+		ROUND(5);
+		ROUND(6);
+		ROUND(7);
+		ROUND(8);
+		ROUND(9);
+
+#undef G
+#undef ROUND
+
+		for (i = 0; i < 8; ++i)
+			state->h[i] ^= v[i] ^ v[i + 8];
+
+		block += BLAKE2S_BLOCKBYTES;
+		--nblocks;
+	}
+}
+
+void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
+{
+	const size_t fill = BLAKE2S_BLOCKBYTES - state->buflen;
+
+	if (unlikely(!inlen))
+		return;
+	if (inlen > fill) {
+		memcpy(state->buf + state->buflen, in, fill);
+		blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCKBYTES);
+		state->buflen = 0;
+		in += fill;
+		inlen -= fill;
+	}
+	if (inlen > BLAKE2S_BLOCKBYTES) {
+		const size_t nblocks = (inlen + BLAKE2S_BLOCKBYTES - 1) / BLAKE2S_BLOCKBYTES;
+		/* Hash one less (full) block than strictly possible */
+		blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCKBYTES);
+		in += BLAKE2S_BLOCKBYTES * (nblocks - 1);
+		inlen -= BLAKE2S_BLOCKBYTES * (nblocks - 1);
+	}
+	memcpy(state->buf + state->buflen, in, inlen);
+	state->buflen += inlen;
+}
+
+void __blake2s_final(struct blake2s_state *state)
+{
+	blake2s_set_lastblock(state);
+	memset(state->buf + state->buflen, 0, BLAKE2S_BLOCKBYTES - state->buflen); /* Padding */
+	blake2s_compress(state, state->buf, 1, state->buflen);
+}
+
+void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen, const size_t inlen, const size_t keylen)
+{
+	struct blake2s_state state;
+	u8 x_key[BLAKE2S_BLOCKBYTES] __aligned(__alignof__(u32)) = { 0 };
+	u8 i_hash[BLAKE2S_OUTBYTES] __aligned(__alignof__(u32));
+	int i;
+
+	if (keylen > BLAKE2S_BLOCKBYTES) {
+		blake2s_init(&state, BLAKE2S_OUTBYTES);
+		blake2s_update(&state, key, keylen);
+		blake2s_final(&state, x_key, BLAKE2S_OUTBYTES);
+	} else
+		memcpy(x_key, key, keylen);
+
+	for (i = 0; i < BLAKE2S_BLOCKBYTES; ++i)
+		x_key[i] ^= 0x36;
+
+	blake2s_init(&state, BLAKE2S_OUTBYTES);
+	blake2s_update(&state, x_key, BLAKE2S_BLOCKBYTES);
+	blake2s_update(&state, in, inlen);
+	blake2s_final(&state, i_hash, BLAKE2S_OUTBYTES);
+
+	for (i = 0; i < BLAKE2S_BLOCKBYTES; ++i)
+		x_key[i] ^= 0x5c ^ 0x36;
+
+	blake2s_init(&state, BLAKE2S_OUTBYTES);
+	blake2s_update(&state, x_key, BLAKE2S_BLOCKBYTES);
+	blake2s_update(&state, i_hash, BLAKE2S_OUTBYTES);
+	blake2s_final(&state, i_hash, BLAKE2S_OUTBYTES);
+
+	memcpy(out, i_hash, outlen);
+	memzero_explicit(x_key, BLAKE2S_BLOCKBYTES);
+	memzero_explicit(i_hash, BLAKE2S_OUTBYTES);
+}
+
+#include "../selftest/blake2s.h"
diff --git b/net/wireguard/crypto/blake2s.h b/net/wireguard/crypto/blake2s.h
new file mode 100644
index 0000000..ac60cfe
--- /dev/null
+++ b/net/wireguard/crypto/blake2s.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_BLAKE2S_H
+#define _WG_BLAKE2S_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <crypto/algapi.h>
+
+enum blake2s_lengths {
+	BLAKE2S_BLOCKBYTES = 64,
+	BLAKE2S_OUTBYTES = 32,
+	BLAKE2S_KEYBYTES = 32
+};
+
+struct blake2s_state {
+	u32 h[8];
+	u32 t[2];
+	u32 f[2];
+	u8 buf[BLAKE2S_BLOCKBYTES];
+	size_t buflen;
+	u8 last_node;
+};
+
+void blake2s_init(struct blake2s_state *state, const size_t outlen);
+void blake2s_init_key(struct blake2s_state *state, const size_t outlen, const void *key, const size_t keylen);
+void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen);
+void __blake2s_final(struct blake2s_state *state);
+static inline void blake2s_final(struct blake2s_state *state, u8 *out, size_t outlen)
+{
+	int i;
+
+#ifdef DEBUG
+	BUG_ON(!out || !outlen || outlen > BLAKE2S_OUTBYTES);
+#endif
+	__blake2s_final(state);
+
+	if (__builtin_constant_p(outlen) && !(outlen % sizeof(u32))) {
+		if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || IS_ALIGNED((unsigned long)out, __alignof__(u32))) {
+			__le32 *outwords = (__le32 *)out;
+
+			for (i = 0; i < outlen / sizeof(u32); ++i)
+				outwords[i] = cpu_to_le32(state->h[i]);
+		} else {
+			__le32 buffer[BLAKE2S_OUTBYTES];
+
+			for (i = 0; i < outlen / sizeof(u32); ++i)
+				buffer[i] = cpu_to_le32(state->h[i]);
+			memcpy(out, buffer, outlen);
+			memzero_explicit(buffer, sizeof(buffer));
+		}
+	} else {
+		u8 buffer[BLAKE2S_OUTBYTES] __aligned(__alignof__(u32));
+		__le32 *outwords = (__le32 *)buffer;
+
+		for (i = 0; i < 8; ++i)
+			outwords[i] = cpu_to_le32(state->h[i]);
+		memcpy(out, buffer, outlen);
+		memzero_explicit(buffer, sizeof(buffer));
+	}
+
+	memzero_explicit(state, sizeof(struct blake2s_state));
+}
+
+
+static inline void blake2s(u8 *out, const u8 *in, const u8 *key, const size_t outlen, size_t inlen, const size_t keylen)
+{
+	struct blake2s_state state;
+
+#ifdef DEBUG
+	BUG_ON((!in && inlen > 0) || !out || !outlen || outlen > BLAKE2S_OUTBYTES || keylen > BLAKE2S_KEYBYTES || (!key && keylen));
+#endif
+
+	if (keylen)
+		blake2s_init_key(&state, outlen, key, keylen);
+	else
+		blake2s_init(&state, outlen);
+
+	blake2s_update(&state, in, inlen);
+	blake2s_final(&state, out, outlen);
+}
+
+void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen, const size_t inlen, const size_t keylen);
+
+void blake2s_fpu_init(void);
+
+#ifdef DEBUG
+bool blake2s_selftest(void);
+#endif
+
+#endif /* _WG_BLAKE2S_H */
diff --git b/net/wireguard/crypto/chacha20-arm.S b/net/wireguard/crypto/chacha20-arm.S
new file mode 100644
index 0000000..ac7a9ea
--- /dev/null
+++ b/net/wireguard/crypto/chacha20-arm.S
@@ -0,0 +1,1471 @@
+/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+
+.text
+#if defined(__thumb2__) || defined(__clang__)
+.syntax	unified
+#endif
+#if defined(__thumb2__)
+.thumb
+#else
+.code	32
+#endif
+
+#if defined(__thumb2__) || defined(__clang__)
+#define ldrhsb	ldrbhs
+#endif
+
+.align	5
+.Lsigma:
+.long	0x61707865,0x3320646e,0x79622d32,0x6b206574	@ endian-neutral
+.Lone:
+.long	1,0,0,0
+.word -1
+
+#if __LINUX_ARM_ARCH__ >= 7 && IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
+.arch	armv7-a
+.fpu	neon
+
+.align	5
+ENTRY(chacha20_neon)
+	ldr		r12,[sp,#0]		@ pull pointer to counter and nonce
+	stmdb		sp!,{r0-r2,r4-r11,lr}
+	cmp		r2,#0			@ len==0?
+#ifdef	__thumb2__
+	itt		eq
+#endif
+	addeq		sp,sp,#4*3
+	beq		.Lno_data_neon
+	cmp		r2,#192			@ test len
+	bls		.Lshort
+.Lchacha20_neon_begin:
+	adr		r14,.Lsigma
+	vstmdb		sp!,{d8-d15}		@ ABI spec says so
+	stmdb		sp!,{r0-r3}
+
+	vld1.32		{q1-q2},[r3]		@ load key
+	ldmia		r3,{r4-r11}		@ load key
+
+	sub		sp,sp,#4*(16+16)
+	vld1.32		{q3},[r12]		@ load counter and nonce
+	add		r12,sp,#4*8
+	ldmia		r14,{r0-r3}		@ load sigma
+	vld1.32		{q0},[r14]!		@ load sigma
+	vld1.32		{q12},[r14]		@ one
+	vst1.32		{q2-q3},[r12]		@ copy 1/2key|counter|nonce
+	vst1.32		{q0-q1},[sp]		@ copy sigma|1/2key
+
+	str		r10,[sp,#4*(16+10)]	@ off-load "rx"
+	str		r11,[sp,#4*(16+11)]	@ off-load "rx"
+	vshl.i32	d26,d24,#1	@ two
+	vstr		d24,[sp,#4*(16+0)]
+	vshl.i32	d28,d24,#2	@ four
+	vstr		d26,[sp,#4*(16+2)]
+	vmov		q4,q0
+	vstr		d28,[sp,#4*(16+4)]
+	vmov		q8,q0
+	vmov		q5,q1
+	vmov		q9,q1
+	b		.Loop_neon_enter
+
+.align	4
+.Loop_neon_outer:
+	ldmia		sp,{r0-r9}		@ load key material
+	cmp		r11,#64*2		@ if len<=64*2
+	bls		.Lbreak_neon		@ switch to integer-only
+	vmov		q4,q0
+	str		r11,[sp,#4*(32+2)]	@ save len
+	vmov		q8,q0
+	str		r12,  [sp,#4*(32+1)]	@ save inp
+	vmov		q5,q1
+	str		r14,  [sp,#4*(32+0)]	@ save out
+	vmov		q9,q1
+.Loop_neon_enter:
+	ldr		r11, [sp,#4*(15)]
+	vadd.i32	q7,q3,q12		@ counter+1
+	ldr		r12,[sp,#4*(12)]	@ modulo-scheduled load
+	vmov		q6,q2
+	ldr		r10, [sp,#4*(13)]
+	vmov		q10,q2
+	ldr		r14,[sp,#4*(14)]
+	vadd.i32	q11,q7,q12		@ counter+2
+	str		r11, [sp,#4*(16+15)]
+	mov		r11,#10
+	add		r12,r12,#3	@ counter+3
+	b		.Loop_neon
+
+.align	4
+.Loop_neon:
+	subs		r11,r11,#1
+	vadd.i32	q0,q0,q1
+	add	r0,r0,r4
+	vadd.i32	q4,q4,q5
+	mov	r12,r12,ror#16
+	vadd.i32	q8,q8,q9
+	add	r1,r1,r5
+	veor	q3,q3,q0
+	mov	r10,r10,ror#16
+	veor	q7,q7,q4
+	eor	r12,r12,r0,ror#16
+	veor	q11,q11,q8
+	eor	r10,r10,r1,ror#16
+	vrev32.16	q3,q3
+	add	r8,r8,r12
+	vrev32.16	q7,q7
+	mov	r4,r4,ror#20
+	vrev32.16	q11,q11
+	add	r9,r9,r10
+	vadd.i32	q2,q2,q3
+	mov	r5,r5,ror#20
+	vadd.i32	q6,q6,q7
+	eor	r4,r4,r8,ror#20
+	vadd.i32	q10,q10,q11
+	eor	r5,r5,r9,ror#20
+	veor	q12,q1,q2
+	add	r0,r0,r4
+	veor	q13,q5,q6
+	mov	r12,r12,ror#24
+	veor	q14,q9,q10
+	add	r1,r1,r5
+	vshr.u32	q1,q12,#20
+	mov	r10,r10,ror#24
+	vshr.u32	q5,q13,#20
+	eor	r12,r12,r0,ror#24
+	vshr.u32	q9,q14,#20
+	eor	r10,r10,r1,ror#24
+	vsli.32	q1,q12,#12
+	add	r8,r8,r12
+	vsli.32	q5,q13,#12
+	mov	r4,r4,ror#25
+	vsli.32	q9,q14,#12
+	add	r9,r9,r10
+	vadd.i32	q0,q0,q1
+	mov	r5,r5,ror#25
+	vadd.i32	q4,q4,q5
+	str	r10,[sp,#4*(16+13)]
+	vadd.i32	q8,q8,q9
+	ldr	r10,[sp,#4*(16+15)]
+	veor	q12,q3,q0
+	eor	r4,r4,r8,ror#25
+	veor	q13,q7,q4
+	eor	r5,r5,r9,ror#25
+	veor	q14,q11,q8
+	str	r8,[sp,#4*(16+8)]
+	vshr.u32	q3,q12,#24
+	ldr	r8,[sp,#4*(16+10)]
+	vshr.u32	q7,q13,#24
+	add	r2,r2,r6
+	vshr.u32	q11,q14,#24
+	mov	r14,r14,ror#16
+	vsli.32	q3,q12,#8
+	str	r9,[sp,#4*(16+9)]
+	vsli.32	q7,q13,#8
+	ldr	r9,[sp,#4*(16+11)]
+	vsli.32	q11,q14,#8
+	add	r3,r3,r7
+	vadd.i32	q2,q2,q3
+	mov	r10,r10,ror#16
+	vadd.i32	q6,q6,q7
+	eor	r14,r14,r2,ror#16
+	vadd.i32	q10,q10,q11
+	eor	r10,r10,r3,ror#16
+	veor	q12,q1,q2
+	add	r8,r8,r14
+	veor	q13,q5,q6
+	mov	r6,r6,ror#20
+	veor	q14,q9,q10
+	add	r9,r9,r10
+	vshr.u32	q1,q12,#25
+	mov	r7,r7,ror#20
+	vshr.u32	q5,q13,#25
+	eor	r6,r6,r8,ror#20
+	vshr.u32	q9,q14,#25
+	eor	r7,r7,r9,ror#20
+	vsli.32	q1,q12,#7
+	add	r2,r2,r6
+	vsli.32	q5,q13,#7
+	mov	r14,r14,ror#24
+	vsli.32	q9,q14,#7
+	add	r3,r3,r7
+	vext.8	q2,q2,q2,#8
+	mov	r10,r10,ror#24
+	vext.8	q6,q6,q6,#8
+	eor	r14,r14,r2,ror#24
+	vext.8	q10,q10,q10,#8
+	eor	r10,r10,r3,ror#24
+	vext.8	q1,q1,q1,#4
+	add	r8,r8,r14
+	vext.8	q5,q5,q5,#4
+	mov	r6,r6,ror#25
+	vext.8	q9,q9,q9,#4
+	add	r9,r9,r10
+	vext.8	q3,q3,q3,#12
+	mov	r7,r7,ror#25
+	vext.8	q7,q7,q7,#12
+	eor	r6,r6,r8,ror#25
+	vext.8	q11,q11,q11,#12
+	eor	r7,r7,r9,ror#25
+	vadd.i32	q0,q0,q1
+	add	r0,r0,r5
+	vadd.i32	q4,q4,q5
+	mov	r10,r10,ror#16
+	vadd.i32	q8,q8,q9
+	add	r1,r1,r6
+	veor	q3,q3,q0
+	mov	r12,r12,ror#16
+	veor	q7,q7,q4
+	eor	r10,r10,r0,ror#16
+	veor	q11,q11,q8
+	eor	r12,r12,r1,ror#16
+	vrev32.16	q3,q3
+	add	r8,r8,r10
+	vrev32.16	q7,q7
+	mov	r5,r5,ror#20
+	vrev32.16	q11,q11
+	add	r9,r9,r12
+	vadd.i32	q2,q2,q3
+	mov	r6,r6,ror#20
+	vadd.i32	q6,q6,q7
+	eor	r5,r5,r8,ror#20
+	vadd.i32	q10,q10,q11
+	eor	r6,r6,r9,ror#20
+	veor	q12,q1,q2
+	add	r0,r0,r5
+	veor	q13,q5,q6
+	mov	r10,r10,ror#24
+	veor	q14,q9,q10
+	add	r1,r1,r6
+	vshr.u32	q1,q12,#20
+	mov	r12,r12,ror#24
+	vshr.u32	q5,q13,#20
+	eor	r10,r10,r0,ror#24
+	vshr.u32	q9,q14,#20
+	eor	r12,r12,r1,ror#24
+	vsli.32	q1,q12,#12
+	add	r8,r8,r10
+	vsli.32	q5,q13,#12
+	mov	r5,r5,ror#25
+	vsli.32	q9,q14,#12
+	str	r10,[sp,#4*(16+15)]
+	vadd.i32	q0,q0,q1
+	ldr	r10,[sp,#4*(16+13)]
+	vadd.i32	q4,q4,q5
+	add	r9,r9,r12
+	vadd.i32	q8,q8,q9
+	mov	r6,r6,ror#25
+	veor	q12,q3,q0
+	eor	r5,r5,r8,ror#25
+	veor	q13,q7,q4
+	eor	r6,r6,r9,ror#25
+	veor	q14,q11,q8
+	str	r8,[sp,#4*(16+10)]
+	vshr.u32	q3,q12,#24
+	ldr	r8,[sp,#4*(16+8)]
+	vshr.u32	q7,q13,#24
+	add	r2,r2,r7
+	vshr.u32	q11,q14,#24
+	mov	r10,r10,ror#16
+	vsli.32	q3,q12,#8
+	str	r9,[sp,#4*(16+11)]
+	vsli.32	q7,q13,#8
+	ldr	r9,[sp,#4*(16+9)]
+	vsli.32	q11,q14,#8
+	add	r3,r3,r4
+	vadd.i32	q2,q2,q3
+	mov	r14,r14,ror#16
+	vadd.i32	q6,q6,q7
+	eor	r10,r10,r2,ror#16
+	vadd.i32	q10,q10,q11
+	eor	r14,r14,r3,ror#16
+	veor	q12,q1,q2
+	add	r8,r8,r10
+	veor	q13,q5,q6
+	mov	r7,r7,ror#20
+	veor	q14,q9,q10
+	add	r9,r9,r14
+	vshr.u32	q1,q12,#25
+	mov	r4,r4,ror#20
+	vshr.u32	q5,q13,#25
+	eor	r7,r7,r8,ror#20
+	vshr.u32	q9,q14,#25
+	eor	r4,r4,r9,ror#20
+	vsli.32	q1,q12,#7
+	add	r2,r2,r7
+	vsli.32	q5,q13,#7
+	mov	r10,r10,ror#24
+	vsli.32	q9,q14,#7
+	add	r3,r3,r4
+	vext.8	q2,q2,q2,#8
+	mov	r14,r14,ror#24
+	vext.8	q6,q6,q6,#8
+	eor	r10,r10,r2,ror#24
+	vext.8	q10,q10,q10,#8
+	eor	r14,r14,r3,ror#24
+	vext.8	q1,q1,q1,#12
+	add	r8,r8,r10
+	vext.8	q5,q5,q5,#12
+	mov	r7,r7,ror#25
+	vext.8	q9,q9,q9,#12
+	add	r9,r9,r14
+	vext.8	q3,q3,q3,#4
+	mov	r4,r4,ror#25
+	vext.8	q7,q7,q7,#4
+	eor	r7,r7,r8,ror#25
+	vext.8	q11,q11,q11,#4
+	eor	r4,r4,r9,ror#25
+	bne		.Loop_neon
+
+	add		r11,sp,#32
+	vld1.32		{q12-q13},[sp]		@ load key material
+	vld1.32		{q14-q15},[r11]
+
+	ldr		r11,[sp,#4*(32+2)]	@ load len
+
+	str		r8, [sp,#4*(16+8)]	@ modulo-scheduled store
+	str		r9, [sp,#4*(16+9)]
+	str		r12,[sp,#4*(16+12)]
+	str		r10, [sp,#4*(16+13)]
+	str		r14,[sp,#4*(16+14)]
+
+	@ at this point we have first half of 512-bit result in
+	@ rx and second half at sp+4*(16+8)
+
+	ldr		r12,[sp,#4*(32+1)]	@ load inp
+	ldr		r14,[sp,#4*(32+0)]	@ load out
+
+	vadd.i32	q0,q0,q12		@ accumulate key material
+	vadd.i32	q4,q4,q12
+	vadd.i32	q8,q8,q12
+	vldr		d24,[sp,#4*(16+0)]	@ one
+
+	vadd.i32	q1,q1,q13
+	vadd.i32	q5,q5,q13
+	vadd.i32	q9,q9,q13
+	vldr		d26,[sp,#4*(16+2)]	@ two
+
+	vadd.i32	q2,q2,q14
+	vadd.i32	q6,q6,q14
+	vadd.i32	q10,q10,q14
+	vadd.i32	d14,d14,d24	@ counter+1
+	vadd.i32	d22,d22,d26	@ counter+2
+
+	vadd.i32	q3,q3,q15
+	vadd.i32	q7,q7,q15
+	vadd.i32	q11,q11,q15
+
+	cmp		r11,#64*4
+	blo		.Ltail_neon
+
+	vld1.8		{q12-q13},[r12]!	@ load input
+	 mov		r11,sp
+	vld1.8		{q14-q15},[r12]!
+	veor		q0,q0,q12		@ xor with input
+	veor		q1,q1,q13
+	vld1.8		{q12-q13},[r12]!
+	veor		q2,q2,q14
+	veor		q3,q3,q15
+	vld1.8		{q14-q15},[r12]!
+
+	veor		q4,q4,q12
+	 vst1.8		{q0-q1},[r14]!	@ store output
+	veor		q5,q5,q13
+	vld1.8		{q12-q13},[r12]!
+	veor		q6,q6,q14
+	 vst1.8		{q2-q3},[r14]!
+	veor		q7,q7,q15
+	vld1.8		{q14-q15},[r12]!
+
+	veor		q8,q8,q12
+	 vld1.32	{q0-q1},[r11]!	@ load for next iteration
+	 veor		d25,d25,d25
+	 vldr		d24,[sp,#4*(16+4)]	@ four
+	veor		q9,q9,q13
+	 vld1.32	{q2-q3},[r11]
+	veor		q10,q10,q14
+	 vst1.8		{q4-q5},[r14]!
+	veor		q11,q11,q15
+	 vst1.8		{q6-q7},[r14]!
+
+	vadd.i32	d6,d6,d24	@ next counter value
+	vldr		d24,[sp,#4*(16+0)]	@ one
+
+	ldmia		sp,{r8-r11}	@ load key material
+	add		r0,r0,r8	@ accumulate key material
+	ldr		r8,[r12],#16		@ load input
+	 vst1.8		{q8-q9},[r14]!
+	add		r1,r1,r9
+	ldr		r9,[r12,#-12]
+	 vst1.8		{q10-q11},[r14]!
+	add		r2,r2,r10
+	ldr		r10,[r12,#-8]
+	add		r3,r3,r11
+	ldr		r11,[r12,#-4]
+#ifdef	__ARMEB__
+	rev		r0,r0
+	rev		r1,r1
+	rev		r2,r2
+	rev		r3,r3
+#endif
+	eor		r0,r0,r8	@ xor with input
+	 add		r8,sp,#4*(4)
+	eor		r1,r1,r9
+	str		r0,[r14],#16		@ store output
+	eor		r2,r2,r10
+	str		r1,[r14,#-12]
+	eor		r3,r3,r11
+	 ldmia		r8,{r8-r11}	@ load key material
+	str		r2,[r14,#-8]
+	str		r3,[r14,#-4]
+
+	add		r4,r4,r8	@ accumulate key material
+	ldr		r8,[r12],#16		@ load input
+	add		r5,r5,r9
+	ldr		r9,[r12,#-12]
+	add		r6,r6,r10
+	ldr		r10,[r12,#-8]
+	add		r7,r7,r11
+	ldr		r11,[r12,#-4]
+#ifdef	__ARMEB__
+	rev		r4,r4
+	rev		r5,r5
+	rev		r6,r6
+	rev		r7,r7
+#endif
+	eor		r4,r4,r8
+	 add		r8,sp,#4*(8)
+	eor		r5,r5,r9
+	str		r4,[r14],#16		@ store output
+	eor		r6,r6,r10
+	str		r5,[r14,#-12]
+	eor		r7,r7,r11
+	 ldmia		r8,{r8-r11}	@ load key material
+	str		r6,[r14,#-8]
+	 add		r0,sp,#4*(16+8)
+	str		r7,[r14,#-4]
+
+	ldmia		r0,{r0-r7}	@ load second half
+
+	add		r0,r0,r8	@ accumulate key material
+	ldr		r8,[r12],#16		@ load input
+	add		r1,r1,r9
+	ldr		r9,[r12,#-12]
+#ifdef	__thumb2__
+	it	hi
+#endif
+	 strhi		r10,[sp,#4*(16+10)]	@ copy "rx" while at it
+	add		r2,r2,r10
+	ldr		r10,[r12,#-8]
+#ifdef	__thumb2__
+	it	hi
+#endif
+	 strhi		r11,[sp,#4*(16+11)]	@ copy "rx" while at it
+	add		r3,r3,r11
+	ldr		r11,[r12,#-4]
+#ifdef	__ARMEB__
+	rev		r0,r0
+	rev		r1,r1
+	rev		r2,r2
+	rev		r3,r3
+#endif
+	eor		r0,r0,r8
+	 add		r8,sp,#4*(12)
+	eor		r1,r1,r9
+	str		r0,[r14],#16		@ store output
+	eor		r2,r2,r10
+	str		r1,[r14,#-12]
+	eor		r3,r3,r11
+	 ldmia		r8,{r8-r11}	@ load key material
+	str		r2,[r14,#-8]
+	str		r3,[r14,#-4]
+
+	add		r4,r4,r8	@ accumulate key material
+	 add		r8,r8,#4		@ next counter value
+	add		r5,r5,r9
+	 str		r8,[sp,#4*(12)]	@ save next counter value
+	ldr		r8,[r12],#16		@ load input
+	add		r6,r6,r10
+	 add		r4,r4,#3		@ counter+3
+	ldr		r9,[r12,#-12]
+	add		r7,r7,r11
+	ldr		r10,[r12,#-8]
+	ldr		r11,[r12,#-4]
+#ifdef	__ARMEB__
+	rev		r4,r4
+	rev		r5,r5
+	rev		r6,r6
+	rev		r7,r7
+#endif
+	eor		r4,r4,r8
+#ifdef	__thumb2__
+	it	hi
+#endif
+	 ldrhi		r8,[sp,#4*(32+2)]	@ re-load len
+	eor		r5,r5,r9
+	eor		r6,r6,r10
+	str		r4,[r14],#16		@ store output
+	eor		r7,r7,r11
+	str		r5,[r14,#-12]
+	 sub		r11,r8,#64*4	@ len-=64*4
+	str		r6,[r14,#-8]
+	str		r7,[r14,#-4]
+	bhi		.Loop_neon_outer
+
+	b		.Ldone_neon
+
+.align	4
+.Lbreak_neon:
+	@ harmonize NEON and integer-only stack frames: load data
+	@ from NEON frame, but save to integer-only one; distance
+	@ between the two is 4*(32+4+16-32)=4*(20).
+
+	str		r11, [sp,#4*(20+32+2)]	@ save len
+	 add		r11,sp,#4*(32+4)
+	str		r12,   [sp,#4*(20+32+1)]	@ save inp
+	str		r14,   [sp,#4*(20+32+0)]	@ save out
+
+	ldr		r12,[sp,#4*(16+10)]
+	ldr		r14,[sp,#4*(16+11)]
+	 vldmia		r11,{d8-d15}			@ fulfill ABI requirement
+	str		r12,[sp,#4*(20+16+10)]	@ copy "rx"
+	str		r14,[sp,#4*(20+16+11)]	@ copy "rx"
+
+	ldr		r11, [sp,#4*(15)]
+	ldr		r12,[sp,#4*(12)]		@ modulo-scheduled load
+	ldr		r10, [sp,#4*(13)]
+	ldr		r14,[sp,#4*(14)]
+	str		r11, [sp,#4*(20+16+15)]
+	add		r11,sp,#4*(20)
+	vst1.32		{q0-q1},[r11]!		@ copy key
+	add		sp,sp,#4*(20)			@ switch frame
+	vst1.32		{q2-q3},[r11]
+	mov		r11,#10
+	b		.Loop				@ go integer-only
+
+.align	4
+.Ltail_neon:
+	cmp		r11,#64*3
+	bhs		.L192_or_more_neon
+	cmp		r11,#64*2
+	bhs		.L128_or_more_neon
+	cmp		r11,#64*1
+	bhs		.L64_or_more_neon
+
+	add		r8,sp,#4*(8)
+	vst1.8		{q0-q1},[sp]
+	add		r10,sp,#4*(0)
+	vst1.8		{q2-q3},[r8]
+	b		.Loop_tail_neon
+
+.align	4
+.L64_or_more_neon:
+	vld1.8		{q12-q13},[r12]!
+	vld1.8		{q14-q15},[r12]!
+	veor		q0,q0,q12
+	veor		q1,q1,q13
+	veor		q2,q2,q14
+	veor		q3,q3,q15
+	vst1.8		{q0-q1},[r14]!
+	vst1.8		{q2-q3},[r14]!
+
+	beq		.Ldone_neon
+
+	add		r8,sp,#4*(8)
+	vst1.8		{q4-q5},[sp]
+	add		r10,sp,#4*(0)
+	vst1.8		{q6-q7},[r8]
+	sub		r11,r11,#64*1	@ len-=64*1
+	b		.Loop_tail_neon
+
+.align	4
+.L128_or_more_neon:
+	vld1.8		{q12-q13},[r12]!
+	vld1.8		{q14-q15},[r12]!
+	veor		q0,q0,q12
+	veor		q1,q1,q13
+	vld1.8		{q12-q13},[r12]!
+	veor		q2,q2,q14
+	veor		q3,q3,q15
+	vld1.8		{q14-q15},[r12]!
+
+	veor		q4,q4,q12
+	veor		q5,q5,q13
+	 vst1.8		{q0-q1},[r14]!
+	veor		q6,q6,q14
+	 vst1.8		{q2-q3},[r14]!
+	veor		q7,q7,q15
+	vst1.8		{q4-q5},[r14]!
+	vst1.8		{q6-q7},[r14]!
+
+	beq		.Ldone_neon
+
+	add		r8,sp,#4*(8)
+	vst1.8		{q8-q9},[sp]
+	add		r10,sp,#4*(0)
+	vst1.8		{q10-q11},[r8]
+	sub		r11,r11,#64*2	@ len-=64*2
+	b		.Loop_tail_neon
+
+.align	4
+.L192_or_more_neon:
+	vld1.8		{q12-q13},[r12]!
+	vld1.8		{q14-q15},[r12]!
+	veor		q0,q0,q12
+	veor		q1,q1,q13
+	vld1.8		{q12-q13},[r12]!
+	veor		q2,q2,q14
+	veor		q3,q3,q15
+	vld1.8		{q14-q15},[r12]!
+
+	veor		q4,q4,q12
+	veor		q5,q5,q13
+	vld1.8		{q12-q13},[r12]!
+	veor		q6,q6,q14
+	 vst1.8		{q0-q1},[r14]!
+	veor		q7,q7,q15
+	vld1.8		{q14-q15},[r12]!
+
+	veor		q8,q8,q12
+	 vst1.8		{q2-q3},[r14]!
+	veor		q9,q9,q13
+	 vst1.8		{q4-q5},[r14]!
+	veor		q10,q10,q14
+	 vst1.8		{q6-q7},[r14]!
+	veor		q11,q11,q15
+	vst1.8		{q8-q9},[r14]!
+	vst1.8		{q10-q11},[r14]!
+
+	beq		.Ldone_neon
+
+	ldmia		sp,{r8-r11}	@ load key material
+	add		r0,r0,r8	@ accumulate key material
+	 add		r8,sp,#4*(4)
+	add		r1,r1,r9
+	add		r2,r2,r10
+	add		r3,r3,r11
+	 ldmia		r8,{r8-r11}	@ load key material
+
+	add		r4,r4,r8	@ accumulate key material
+	 add		r8,sp,#4*(8)
+	add		r5,r5,r9
+	add		r6,r6,r10
+	add		r7,r7,r11
+	 ldmia		r8,{r8-r11}	@ load key material
+#ifdef	__ARMEB__
+	rev		r0,r0
+	rev		r1,r1
+	rev		r2,r2
+	rev		r3,r3
+	rev		r4,r4
+	rev		r5,r5
+	rev		r6,r6
+	rev		r7,r7
+#endif
+	stmia		sp,{r0-r7}
+	 add		r0,sp,#4*(16+8)
+
+	ldmia		r0,{r0-r7}	@ load second half
+
+	add		r0,r0,r8	@ accumulate key material
+	 add		r8,sp,#4*(12)
+	add		r1,r1,r9
+	add		r2,r2,r10
+	add		r3,r3,r11
+	 ldmia		r8,{r8-r11}	@ load key material
+
+	add		r4,r4,r8	@ accumulate key material
+	 add		r8,sp,#4*(8)
+	add		r5,r5,r9
+	 add		r4,r4,#3		@ counter+3
+	add		r6,r6,r10
+	add		r7,r7,r11
+	 ldr		r11,[sp,#4*(32+2)]	@ re-load len
+#ifdef	__ARMEB__
+	rev		r0,r0
+	rev		r1,r1
+	rev		r2,r2
+	rev		r3,r3
+	rev		r4,r4
+	rev		r5,r5
+	rev		r6,r6
+	rev		r7,r7
+#endif
+	stmia		r8,{r0-r7}
+	 add		r10,sp,#4*(0)
+	 sub		r11,r11,#64*3	@ len-=64*3
+
+.Loop_tail_neon:
+	ldrb		r8,[r10],#1	@ read buffer on stack
+	ldrb		r9,[r12],#1		@ read input
+	subs		r11,r11,#1
+	eor		r8,r8,r9
+	strb		r8,[r14],#1		@ store output
+	bne		.Loop_tail_neon
+
+.Ldone_neon:
+	add		sp,sp,#4*(32+4)
+	vldmia		sp,{d8-d15}
+	add		sp,sp,#4*(16+3)
+.Lno_data_neon:
+	ldmia		sp!,{r4-r11,pc}
+ENDPROC(chacha20_neon)
+#endif
+
+.align	5
+.Lsigma2:
+.long	0x61707865,0x3320646e,0x79622d32,0x6b206574	@ endian-neutral
+.Lone2:
+.long	1,0,0,0
+.word -1
+
+.align	5
+ENTRY(chacha20_arm)
+	ldr	r12,[sp,#0]		@ pull pointer to counter and nonce
+	stmdb	sp!,{r0-r2,r4-r11,lr}
+	cmp	r2,#0			@ len==0?
+#ifdef	__thumb2__
+	itt	eq
+#endif
+	addeq	sp,sp,#4*3
+	beq	.Lno_data_arm
+.Lshort:
+	ldmia	r12,{r4-r7}		@ load counter and nonce
+	sub	sp,sp,#4*(16)		@ off-load area
+#if __LINUX_ARM_ARCH__ < 7 && !defined(__thumb2__)
+	sub	r14,pc,#100		@ .Lsigma2
+#else
+	adr	r14,.Lsigma2		@ .Lsigma2
+#endif
+	stmdb	sp!,{r4-r7}		@ copy counter and nonce
+	ldmia	r3,{r4-r11}		@ load key
+	ldmia	r14,{r0-r3}		@ load sigma
+	stmdb	sp!,{r4-r11}		@ copy key
+	stmdb	sp!,{r0-r3}		@ copy sigma
+	str	r10,[sp,#4*(16+10)]	@ off-load "rx"
+	str	r11,[sp,#4*(16+11)]	@ off-load "rx"
+	b	.Loop_outer_enter
+
+.align	4
+.Loop_outer:
+	ldmia	sp,{r0-r9}		@ load key material
+	str	r11,[sp,#4*(32+2)]	@ save len
+	str	r12,  [sp,#4*(32+1)]	@ save inp
+	str	r14,  [sp,#4*(32+0)]	@ save out
+.Loop_outer_enter:
+	ldr	r11, [sp,#4*(15)]
+	ldr	r12,[sp,#4*(12)]	@ modulo-scheduled load
+	ldr	r10, [sp,#4*(13)]
+	ldr	r14,[sp,#4*(14)]
+	str	r11, [sp,#4*(16+15)]
+	mov	r11,#10
+	b	.Loop
+
+.align	4
+.Loop:
+	subs	r11,r11,#1
+	add	r0,r0,r4
+	mov	r12,r12,ror#16
+	add	r1,r1,r5
+	mov	r10,r10,ror#16
+	eor	r12,r12,r0,ror#16
+	eor	r10,r10,r1,ror#16
+	add	r8,r8,r12
+	mov	r4,r4,ror#20
+	add	r9,r9,r10
+	mov	r5,r5,ror#20
+	eor	r4,r4,r8,ror#20
+	eor	r5,r5,r9,ror#20
+	add	r0,r0,r4
+	mov	r12,r12,ror#24
+	add	r1,r1,r5
+	mov	r10,r10,ror#24
+	eor	r12,r12,r0,ror#24
+	eor	r10,r10,r1,ror#24
+	add	r8,r8,r12
+	mov	r4,r4,ror#25
+	add	r9,r9,r10
+	mov	r5,r5,ror#25
+	str	r10,[sp,#4*(16+13)]
+	ldr	r10,[sp,#4*(16+15)]
+	eor	r4,r4,r8,ror#25
+	eor	r5,r5,r9,ror#25
+	str	r8,[sp,#4*(16+8)]
+	ldr	r8,[sp,#4*(16+10)]
+	add	r2,r2,r6
+	mov	r14,r14,ror#16
+	str	r9,[sp,#4*(16+9)]
+	ldr	r9,[sp,#4*(16+11)]
+	add	r3,r3,r7
+	mov	r10,r10,ror#16
+	eor	r14,r14,r2,ror#16
+	eor	r10,r10,r3,ror#16
+	add	r8,r8,r14
+	mov	r6,r6,ror#20
+	add	r9,r9,r10
+	mov	r7,r7,ror#20
+	eor	r6,r6,r8,ror#20
+	eor	r7,r7,r9,ror#20
+	add	r2,r2,r6
+	mov	r14,r14,ror#24
+	add	r3,r3,r7
+	mov	r10,r10,ror#24
+	eor	r14,r14,r2,ror#24
+	eor	r10,r10,r3,ror#24
+	add	r8,r8,r14
+	mov	r6,r6,ror#25
+	add	r9,r9,r10
+	mov	r7,r7,ror#25
+	eor	r6,r6,r8,ror#25
+	eor	r7,r7,r9,ror#25
+	add	r0,r0,r5
+	mov	r10,r10,ror#16
+	add	r1,r1,r6
+	mov	r12,r12,ror#16
+	eor	r10,r10,r0,ror#16
+	eor	r12,r12,r1,ror#16
+	add	r8,r8,r10
+	mov	r5,r5,ror#20
+	add	r9,r9,r12
+	mov	r6,r6,ror#20
+	eor	r5,r5,r8,ror#20
+	eor	r6,r6,r9,ror#20
+	add	r0,r0,r5
+	mov	r10,r10,ror#24
+	add	r1,r1,r6
+	mov	r12,r12,ror#24
+	eor	r10,r10,r0,ror#24
+	eor	r12,r12,r1,ror#24
+	add	r8,r8,r10
+	mov	r5,r5,ror#25
+	str	r10,[sp,#4*(16+15)]
+	ldr	r10,[sp,#4*(16+13)]
+	add	r9,r9,r12
+	mov	r6,r6,ror#25
+	eor	r5,r5,r8,ror#25
+	eor	r6,r6,r9,ror#25
+	str	r8,[sp,#4*(16+10)]
+	ldr	r8,[sp,#4*(16+8)]
+	add	r2,r2,r7
+	mov	r10,r10,ror#16
+	str	r9,[sp,#4*(16+11)]
+	ldr	r9,[sp,#4*(16+9)]
+	add	r3,r3,r4
+	mov	r14,r14,ror#16
+	eor	r10,r10,r2,ror#16
+	eor	r14,r14,r3,ror#16
+	add	r8,r8,r10
+	mov	r7,r7,ror#20
+	add	r9,r9,r14
+	mov	r4,r4,ror#20
+	eor	r7,r7,r8,ror#20
+	eor	r4,r4,r9,ror#20
+	add	r2,r2,r7
+	mov	r10,r10,ror#24
+	add	r3,r3,r4
+	mov	r14,r14,ror#24
+	eor	r10,r10,r2,ror#24
+	eor	r14,r14,r3,ror#24
+	add	r8,r8,r10
+	mov	r7,r7,ror#25
+	add	r9,r9,r14
+	mov	r4,r4,ror#25
+	eor	r7,r7,r8,ror#25
+	eor	r4,r4,r9,ror#25
+	bne	.Loop
+
+	ldr	r11,[sp,#4*(32+2)]	@ load len
+
+	str	r8, [sp,#4*(16+8)]	@ modulo-scheduled store
+	str	r9, [sp,#4*(16+9)]
+	str	r12,[sp,#4*(16+12)]
+	str	r10, [sp,#4*(16+13)]
+	str	r14,[sp,#4*(16+14)]
+
+	@ at this point we have first half of 512-bit result in
+	@ rx and second half at sp+4*(16+8)
+
+	cmp	r11,#64		@ done yet?
+#ifdef	__thumb2__
+	itete	lo
+#endif
+	addlo	r12,sp,#4*(0)		@ shortcut or ...
+	ldrhs	r12,[sp,#4*(32+1)]	@ ... load inp
+	addlo	r14,sp,#4*(0)		@ shortcut or ...
+	ldrhs	r14,[sp,#4*(32+0)]	@ ... load out
+
+	ldr	r8,[sp,#4*(0)]	@ load key material
+	ldr	r9,[sp,#4*(1)]
+
+#if __LINUX_ARM_ARCH__ >= 6 || !defined(__ARMEB__)
+#if __LINUX_ARM_ARCH__ < 7
+	orr	r10,r12,r14
+	tst	r10,#3		@ are input and output aligned?
+	ldr	r10,[sp,#4*(2)]
+	bne	.Lunaligned
+	cmp	r11,#64		@ restore flags
+#else
+	ldr	r10,[sp,#4*(2)]
+#endif
+	ldr	r11,[sp,#4*(3)]
+
+	add	r0,r0,r8	@ accumulate key material
+	add	r1,r1,r9
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhs	r8,[r12],#16		@ load input
+	ldrhs	r9,[r12,#-12]
+
+	add	r2,r2,r10
+	add	r3,r3,r11
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhs	r10,[r12,#-8]
+	ldrhs	r11,[r12,#-4]
+#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__)
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	eorhs	r0,r0,r8	@ xor with input
+	eorhs	r1,r1,r9
+	 add	r8,sp,#4*(4)
+	str	r0,[r14],#16		@ store output
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	eorhs	r2,r2,r10
+	eorhs	r3,r3,r11
+	 ldmia	r8,{r8-r11}	@ load key material
+	str	r1,[r14,#-12]
+	str	r2,[r14,#-8]
+	str	r3,[r14,#-4]
+
+	add	r4,r4,r8	@ accumulate key material
+	add	r5,r5,r9
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhs	r8,[r12],#16		@ load input
+	ldrhs	r9,[r12,#-12]
+	add	r6,r6,r10
+	add	r7,r7,r11
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhs	r10,[r12,#-8]
+	ldrhs	r11,[r12,#-4]
+#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__)
+	rev	r4,r4
+	rev	r5,r5
+	rev	r6,r6
+	rev	r7,r7
+#endif
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	eorhs	r4,r4,r8
+	eorhs	r5,r5,r9
+	 add	r8,sp,#4*(8)
+	str	r4,[r14],#16		@ store output
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	eorhs	r6,r6,r10
+	eorhs	r7,r7,r11
+	str	r5,[r14,#-12]
+	 ldmia	r8,{r8-r11}	@ load key material
+	str	r6,[r14,#-8]
+	 add	r0,sp,#4*(16+8)
+	str	r7,[r14,#-4]
+
+	ldmia	r0,{r0-r7}	@ load second half
+
+	add	r0,r0,r8	@ accumulate key material
+	add	r1,r1,r9
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhs	r8,[r12],#16		@ load input
+	ldrhs	r9,[r12,#-12]
+#ifdef	__thumb2__
+	itt	hi
+#endif
+	 strhi	r10,[sp,#4*(16+10)]	@ copy "rx" while at it
+	 strhi	r11,[sp,#4*(16+11)]	@ copy "rx" while at it
+	add	r2,r2,r10
+	add	r3,r3,r11
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhs	r10,[r12,#-8]
+	ldrhs	r11,[r12,#-4]
+#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__)
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	eorhs	r0,r0,r8
+	eorhs	r1,r1,r9
+	 add	r8,sp,#4*(12)
+	str	r0,[r14],#16		@ store output
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	eorhs	r2,r2,r10
+	eorhs	r3,r3,r11
+	str	r1,[r14,#-12]
+	 ldmia	r8,{r8-r11}	@ load key material
+	str	r2,[r14,#-8]
+	str	r3,[r14,#-4]
+
+	add	r4,r4,r8	@ accumulate key material
+	add	r5,r5,r9
+#ifdef	__thumb2__
+	itt	hi
+#endif
+	 addhi	r8,r8,#1		@ next counter value
+	 strhi	r8,[sp,#4*(12)]	@ save next counter value
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhs	r8,[r12],#16		@ load input
+	ldrhs	r9,[r12,#-12]
+	add	r6,r6,r10
+	add	r7,r7,r11
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhs	r10,[r12,#-8]
+	ldrhs	r11,[r12,#-4]
+#if __LINUX_ARM_ARCH__ >= 6 && defined(__ARMEB__)
+	rev	r4,r4
+	rev	r5,r5
+	rev	r6,r6
+	rev	r7,r7
+#endif
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	eorhs	r4,r4,r8
+	eorhs	r5,r5,r9
+#ifdef	__thumb2__
+	 it	ne
+#endif
+	 ldrne	r8,[sp,#4*(32+2)]	@ re-load len
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	eorhs	r6,r6,r10
+	eorhs	r7,r7,r11
+	str	r4,[r14],#16		@ store output
+	str	r5,[r14,#-12]
+#ifdef	__thumb2__
+	it	hs
+#endif
+	 subhs	r11,r8,#64		@ len-=64
+	str	r6,[r14,#-8]
+	str	r7,[r14,#-4]
+	bhi	.Loop_outer
+
+	beq	.Ldone
+#if __LINUX_ARM_ARCH__ < 7
+	b	.Ltail
+
+.align	4
+.Lunaligned:				@ unaligned endian-neutral path
+	cmp	r11,#64		@ restore flags
+#endif
+#endif
+#if __LINUX_ARM_ARCH__ < 7
+	ldr	r11,[sp,#4*(3)]
+	add	r0,r0,r8		@ accumulate key material
+	add	r1,r1,r9
+	add	r2,r2,r10
+#ifdef	__thumb2__
+	itete	lo
+#endif
+	eorlo	r8,r8,r8		@ zero or ...
+	ldrhsb	r8,[r12],#16			@ ... load input
+	eorlo	r9,r9,r9
+	ldrhsb	r9,[r12,#-12]
+
+	add	r3,r3,r11
+#ifdef	__thumb2__
+	itete	lo
+#endif
+	eorlo	r10,r10,r10
+	ldrhsb	r10,[r12,#-8]
+	eorlo	r11,r11,r11
+	ldrhsb	r11,[r12,#-4]
+
+	eor	r0,r8,r0		@ xor with input (or zero)
+	eor	r1,r9,r1
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-15]		@ load more input
+	ldrhsb	r9,[r12,#-11]
+	eor	r2,r10,r2
+	 strb	r0,[r14],#16		@ store output
+	eor	r3,r11,r3
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-7]
+	ldrhsb	r11,[r12,#-3]
+	 strb	r1,[r14,#-12]
+	eor	r0,r8,r0,lsr#8
+	 strb	r2,[r14,#-8]
+	eor	r1,r9,r1,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-14]		@ load more input
+	ldrhsb	r9,[r12,#-10]
+	 strb	r3,[r14,#-4]
+	eor	r2,r10,r2,lsr#8
+	 strb	r0,[r14,#-15]
+	eor	r3,r11,r3,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-6]
+	ldrhsb	r11,[r12,#-2]
+	 strb	r1,[r14,#-11]
+	eor	r0,r8,r0,lsr#8
+	 strb	r2,[r14,#-7]
+	eor	r1,r9,r1,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-13]		@ load more input
+	ldrhsb	r9,[r12,#-9]
+	 strb	r3,[r14,#-3]
+	eor	r2,r10,r2,lsr#8
+	 strb	r0,[r14,#-14]
+	eor	r3,r11,r3,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-5]
+	ldrhsb	r11,[r12,#-1]
+	 strb	r1,[r14,#-10]
+	 strb	r2,[r14,#-6]
+	eor	r0,r8,r0,lsr#8
+	 strb	r3,[r14,#-2]
+	eor	r1,r9,r1,lsr#8
+	 strb	r0,[r14,#-13]
+	eor	r2,r10,r2,lsr#8
+	 strb	r1,[r14,#-9]
+	eor	r3,r11,r3,lsr#8
+	 strb	r2,[r14,#-5]
+	 strb	r3,[r14,#-1]
+	add	r8,sp,#4*(4+0)
+	ldmia	r8,{r8-r11}		@ load key material
+	add	r0,sp,#4*(16+8)
+	add	r4,r4,r8		@ accumulate key material
+	add	r5,r5,r9
+	add	r6,r6,r10
+#ifdef	__thumb2__
+	itete	lo
+#endif
+	eorlo	r8,r8,r8		@ zero or ...
+	ldrhsb	r8,[r12],#16			@ ... load input
+	eorlo	r9,r9,r9
+	ldrhsb	r9,[r12,#-12]
+
+	add	r7,r7,r11
+#ifdef	__thumb2__
+	itete	lo
+#endif
+	eorlo	r10,r10,r10
+	ldrhsb	r10,[r12,#-8]
+	eorlo	r11,r11,r11
+	ldrhsb	r11,[r12,#-4]
+
+	eor	r4,r8,r4		@ xor with input (or zero)
+	eor	r5,r9,r5
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-15]		@ load more input
+	ldrhsb	r9,[r12,#-11]
+	eor	r6,r10,r6
+	 strb	r4,[r14],#16		@ store output
+	eor	r7,r11,r7
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-7]
+	ldrhsb	r11,[r12,#-3]
+	 strb	r5,[r14,#-12]
+	eor	r4,r8,r4,lsr#8
+	 strb	r6,[r14,#-8]
+	eor	r5,r9,r5,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-14]		@ load more input
+	ldrhsb	r9,[r12,#-10]
+	 strb	r7,[r14,#-4]
+	eor	r6,r10,r6,lsr#8
+	 strb	r4,[r14,#-15]
+	eor	r7,r11,r7,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-6]
+	ldrhsb	r11,[r12,#-2]
+	 strb	r5,[r14,#-11]
+	eor	r4,r8,r4,lsr#8
+	 strb	r6,[r14,#-7]
+	eor	r5,r9,r5,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-13]		@ load more input
+	ldrhsb	r9,[r12,#-9]
+	 strb	r7,[r14,#-3]
+	eor	r6,r10,r6,lsr#8
+	 strb	r4,[r14,#-14]
+	eor	r7,r11,r7,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-5]
+	ldrhsb	r11,[r12,#-1]
+	 strb	r5,[r14,#-10]
+	 strb	r6,[r14,#-6]
+	eor	r4,r8,r4,lsr#8
+	 strb	r7,[r14,#-2]
+	eor	r5,r9,r5,lsr#8
+	 strb	r4,[r14,#-13]
+	eor	r6,r10,r6,lsr#8
+	 strb	r5,[r14,#-9]
+	eor	r7,r11,r7,lsr#8
+	 strb	r6,[r14,#-5]
+	 strb	r7,[r14,#-1]
+	add	r8,sp,#4*(4+4)
+	ldmia	r8,{r8-r11}		@ load key material
+	ldmia	r0,{r0-r7}		@ load second half
+#ifdef	__thumb2__
+	itt	hi
+#endif
+	strhi	r10,[sp,#4*(16+10)]		@ copy "rx"
+	strhi	r11,[sp,#4*(16+11)]		@ copy "rx"
+	add	r0,r0,r8		@ accumulate key material
+	add	r1,r1,r9
+	add	r2,r2,r10
+#ifdef	__thumb2__
+	itete	lo
+#endif
+	eorlo	r8,r8,r8		@ zero or ...
+	ldrhsb	r8,[r12],#16			@ ... load input
+	eorlo	r9,r9,r9
+	ldrhsb	r9,[r12,#-12]
+
+	add	r3,r3,r11
+#ifdef	__thumb2__
+	itete	lo
+#endif
+	eorlo	r10,r10,r10
+	ldrhsb	r10,[r12,#-8]
+	eorlo	r11,r11,r11
+	ldrhsb	r11,[r12,#-4]
+
+	eor	r0,r8,r0		@ xor with input (or zero)
+	eor	r1,r9,r1
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-15]		@ load more input
+	ldrhsb	r9,[r12,#-11]
+	eor	r2,r10,r2
+	 strb	r0,[r14],#16		@ store output
+	eor	r3,r11,r3
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-7]
+	ldrhsb	r11,[r12,#-3]
+	 strb	r1,[r14,#-12]
+	eor	r0,r8,r0,lsr#8
+	 strb	r2,[r14,#-8]
+	eor	r1,r9,r1,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-14]		@ load more input
+	ldrhsb	r9,[r12,#-10]
+	 strb	r3,[r14,#-4]
+	eor	r2,r10,r2,lsr#8
+	 strb	r0,[r14,#-15]
+	eor	r3,r11,r3,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-6]
+	ldrhsb	r11,[r12,#-2]
+	 strb	r1,[r14,#-11]
+	eor	r0,r8,r0,lsr#8
+	 strb	r2,[r14,#-7]
+	eor	r1,r9,r1,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-13]		@ load more input
+	ldrhsb	r9,[r12,#-9]
+	 strb	r3,[r14,#-3]
+	eor	r2,r10,r2,lsr#8
+	 strb	r0,[r14,#-14]
+	eor	r3,r11,r3,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-5]
+	ldrhsb	r11,[r12,#-1]
+	 strb	r1,[r14,#-10]
+	 strb	r2,[r14,#-6]
+	eor	r0,r8,r0,lsr#8
+	 strb	r3,[r14,#-2]
+	eor	r1,r9,r1,lsr#8
+	 strb	r0,[r14,#-13]
+	eor	r2,r10,r2,lsr#8
+	 strb	r1,[r14,#-9]
+	eor	r3,r11,r3,lsr#8
+	 strb	r2,[r14,#-5]
+	 strb	r3,[r14,#-1]
+	add	r8,sp,#4*(4+8)
+	ldmia	r8,{r8-r11}		@ load key material
+	add	r4,r4,r8		@ accumulate key material
+#ifdef	__thumb2__
+	itt	hi
+#endif
+	addhi	r8,r8,#1			@ next counter value
+	strhi	r8,[sp,#4*(12)]		@ save next counter value
+	add	r5,r5,r9
+	add	r6,r6,r10
+#ifdef	__thumb2__
+	itete	lo
+#endif
+	eorlo	r8,r8,r8		@ zero or ...
+	ldrhsb	r8,[r12],#16			@ ... load input
+	eorlo	r9,r9,r9
+	ldrhsb	r9,[r12,#-12]
+
+	add	r7,r7,r11
+#ifdef	__thumb2__
+	itete	lo
+#endif
+	eorlo	r10,r10,r10
+	ldrhsb	r10,[r12,#-8]
+	eorlo	r11,r11,r11
+	ldrhsb	r11,[r12,#-4]
+
+	eor	r4,r8,r4		@ xor with input (or zero)
+	eor	r5,r9,r5
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-15]		@ load more input
+	ldrhsb	r9,[r12,#-11]
+	eor	r6,r10,r6
+	 strb	r4,[r14],#16		@ store output
+	eor	r7,r11,r7
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-7]
+	ldrhsb	r11,[r12,#-3]
+	 strb	r5,[r14,#-12]
+	eor	r4,r8,r4,lsr#8
+	 strb	r6,[r14,#-8]
+	eor	r5,r9,r5,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-14]		@ load more input
+	ldrhsb	r9,[r12,#-10]
+	 strb	r7,[r14,#-4]
+	eor	r6,r10,r6,lsr#8
+	 strb	r4,[r14,#-15]
+	eor	r7,r11,r7,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-6]
+	ldrhsb	r11,[r12,#-2]
+	 strb	r5,[r14,#-11]
+	eor	r4,r8,r4,lsr#8
+	 strb	r6,[r14,#-7]
+	eor	r5,r9,r5,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r8,[r12,#-13]		@ load more input
+	ldrhsb	r9,[r12,#-9]
+	 strb	r7,[r14,#-3]
+	eor	r6,r10,r6,lsr#8
+	 strb	r4,[r14,#-14]
+	eor	r7,r11,r7,lsr#8
+#ifdef	__thumb2__
+	itt	hs
+#endif
+	ldrhsb	r10,[r12,#-5]
+	ldrhsb	r11,[r12,#-1]
+	 strb	r5,[r14,#-10]
+	 strb	r6,[r14,#-6]
+	eor	r4,r8,r4,lsr#8
+	 strb	r7,[r14,#-2]
+	eor	r5,r9,r5,lsr#8
+	 strb	r4,[r14,#-13]
+	eor	r6,r10,r6,lsr#8
+	 strb	r5,[r14,#-9]
+	eor	r7,r11,r7,lsr#8
+	 strb	r6,[r14,#-5]
+	 strb	r7,[r14,#-1]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	ldrne	r8,[sp,#4*(32+2)]		@ re-load len
+#ifdef	__thumb2__
+	it	hs
+#endif
+	subhs	r11,r8,#64			@ len-=64
+	bhi	.Loop_outer
+
+	beq	.Ldone
+#endif
+
+.Ltail:
+	ldr	r12,[sp,#4*(32+1)]	@ load inp
+	add	r9,sp,#4*(0)
+	ldr	r14,[sp,#4*(32+0)]	@ load out
+
+.Loop_tail:
+	ldrb	r10,[r9],#1	@ read buffer on stack
+	ldrb	r11,[r12],#1		@ read input
+	subs	r8,r8,#1
+	eor	r11,r11,r10
+	strb	r11,[r14],#1		@ store output
+	bne	.Loop_tail
+
+.Ldone:
+	add	sp,sp,#4*(32+3)
+.Lno_data_arm:
+	ldmia	sp!,{r4-r11,pc}
+ENDPROC(chacha20_arm)
diff --git b/net/wireguard/crypto/chacha20-arm64.S b/net/wireguard/crypto/chacha20-arm64.S
new file mode 100644
index 0000000..8acef8e
--- /dev/null
+++ b/net/wireguard/crypto/chacha20-arm64.S
@@ -0,0 +1,1940 @@
+/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+
+.text
+.align	5
+.Lsigma:
+.quad	0x3320646e61707865,0x6b20657479622d32		// endian-neutral
+.Lone:
+.long	1,0,0,0
+
+.align	5
+ENTRY(chacha20_arm)
+	cbz	x2,.Labort
+.Lshort:
+	stp	x29,x30,[sp,#-96]!
+	add	x29,sp,#0
+
+	adr	x5,.Lsigma
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+	sub	sp,sp,#64
+
+	ldp	x22,x23,[x5]		// load sigma
+	ldp	x24,x25,[x3]		// load key
+	ldp	x26,x27,[x3,#16]
+	ldp	x28,x30,[x4]		// load counter
+#ifdef	__ARMEB__
+	ror	x24,x24,#32
+	ror	x25,x25,#32
+	ror	x26,x26,#32
+	ror	x27,x27,#32
+	ror	x28,x28,#32
+	ror	x30,x30,#32
+#endif
+
+.Loop_outer:
+	mov	w5,w22			// unpack key block
+	lsr	x6,x22,#32
+	mov	w7,w23
+	lsr	x8,x23,#32
+	mov	w9,w24
+	lsr	x10,x24,#32
+	mov	w11,w25
+	lsr	x12,x25,#32
+	mov	w13,w26
+	lsr	x14,x26,#32
+	mov	w15,w27
+	lsr	x16,x27,#32
+	mov	w17,w28
+	lsr	x19,x28,#32
+	mov	w20,w30
+	lsr	x21,x30,#32
+
+	mov	x4,#10
+	subs	x2,x2,#64
+.Loop:
+	sub	x4,x4,#1
+	add	w5,w5,w9
+	add	w6,w6,w10
+	add	w7,w7,w11
+	add	w8,w8,w12
+	eor	w17,w17,w5
+	eor	w19,w19,w6
+	eor	w20,w20,w7
+	eor	w21,w21,w8
+	ror	w17,w17,#16
+	ror	w19,w19,#16
+	ror	w20,w20,#16
+	ror	w21,w21,#16
+	add	w13,w13,w17
+	add	w14,w14,w19
+	add	w15,w15,w20
+	add	w16,w16,w21
+	eor	w9,w9,w13
+	eor	w10,w10,w14
+	eor	w11,w11,w15
+	eor	w12,w12,w16
+	ror	w9,w9,#20
+	ror	w10,w10,#20
+	ror	w11,w11,#20
+	ror	w12,w12,#20
+	add	w5,w5,w9
+	add	w6,w6,w10
+	add	w7,w7,w11
+	add	w8,w8,w12
+	eor	w17,w17,w5
+	eor	w19,w19,w6
+	eor	w20,w20,w7
+	eor	w21,w21,w8
+	ror	w17,w17,#24
+	ror	w19,w19,#24
+	ror	w20,w20,#24
+	ror	w21,w21,#24
+	add	w13,w13,w17
+	add	w14,w14,w19
+	add	w15,w15,w20
+	add	w16,w16,w21
+	eor	w9,w9,w13
+	eor	w10,w10,w14
+	eor	w11,w11,w15
+	eor	w12,w12,w16
+	ror	w9,w9,#25
+	ror	w10,w10,#25
+	ror	w11,w11,#25
+	ror	w12,w12,#25
+	add	w5,w5,w10
+	add	w6,w6,w11
+	add	w7,w7,w12
+	add	w8,w8,w9
+	eor	w21,w21,w5
+	eor	w17,w17,w6
+	eor	w19,w19,w7
+	eor	w20,w20,w8
+	ror	w21,w21,#16
+	ror	w17,w17,#16
+	ror	w19,w19,#16
+	ror	w20,w20,#16
+	add	w15,w15,w21
+	add	w16,w16,w17
+	add	w13,w13,w19
+	add	w14,w14,w20
+	eor	w10,w10,w15
+	eor	w11,w11,w16
+	eor	w12,w12,w13
+	eor	w9,w9,w14
+	ror	w10,w10,#20
+	ror	w11,w11,#20
+	ror	w12,w12,#20
+	ror	w9,w9,#20
+	add	w5,w5,w10
+	add	w6,w6,w11
+	add	w7,w7,w12
+	add	w8,w8,w9
+	eor	w21,w21,w5
+	eor	w17,w17,w6
+	eor	w19,w19,w7
+	eor	w20,w20,w8
+	ror	w21,w21,#24
+	ror	w17,w17,#24
+	ror	w19,w19,#24
+	ror	w20,w20,#24
+	add	w15,w15,w21
+	add	w16,w16,w17
+	add	w13,w13,w19
+	add	w14,w14,w20
+	eor	w10,w10,w15
+	eor	w11,w11,w16
+	eor	w12,w12,w13
+	eor	w9,w9,w14
+	ror	w10,w10,#25
+	ror	w11,w11,#25
+	ror	w12,w12,#25
+	ror	w9,w9,#25
+	cbnz	x4,.Loop
+
+	add	w5,w5,w22		// accumulate key block
+	add	x6,x6,x22,lsr#32
+	add	w7,w7,w23
+	add	x8,x8,x23,lsr#32
+	add	w9,w9,w24
+	add	x10,x10,x24,lsr#32
+	add	w11,w11,w25
+	add	x12,x12,x25,lsr#32
+	add	w13,w13,w26
+	add	x14,x14,x26,lsr#32
+	add	w15,w15,w27
+	add	x16,x16,x27,lsr#32
+	add	w17,w17,w28
+	add	x19,x19,x28,lsr#32
+	add	w20,w20,w30
+	add	x21,x21,x30,lsr#32
+
+	b.lo	.Ltail
+
+	add	x5,x5,x6,lsl#32	// pack
+	add	x7,x7,x8,lsl#32
+	ldp	x6,x8,[x1,#0]		// load input
+	add	x9,x9,x10,lsl#32
+	add	x11,x11,x12,lsl#32
+	ldp	x10,x12,[x1,#16]
+	add	x13,x13,x14,lsl#32
+	add	x15,x15,x16,lsl#32
+	ldp	x14,x16,[x1,#32]
+	add	x17,x17,x19,lsl#32
+	add	x20,x20,x21,lsl#32
+	ldp	x19,x21,[x1,#48]
+	add	x1,x1,#64
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	eor	x5,x5,x6
+	eor	x7,x7,x8
+	eor	x9,x9,x10
+	eor	x11,x11,x12
+	eor	x13,x13,x14
+	eor	x15,x15,x16
+	eor	x17,x17,x19
+	eor	x20,x20,x21
+
+	stp	x5,x7,[x0,#0]		// store output
+	add	x28,x28,#1			// increment counter
+	stp	x9,x11,[x0,#16]
+	stp	x13,x15,[x0,#32]
+	stp	x17,x20,[x0,#48]
+	add	x0,x0,#64
+
+	b.hi	.Loop_outer
+
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#64
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#96
+.Labort:
+	ret
+
+.align	4
+.Ltail:
+	add	x2,x2,#64
+.Less_than_64:
+	sub	x0,x0,#1
+	add	x1,x1,x2
+	add	x0,x0,x2
+	add	x4,sp,x2
+	neg	x2,x2
+
+	add	x5,x5,x6,lsl#32	// pack
+	add	x7,x7,x8,lsl#32
+	add	x9,x9,x10,lsl#32
+	add	x11,x11,x12,lsl#32
+	add	x13,x13,x14,lsl#32
+	add	x15,x15,x16,lsl#32
+	add	x17,x17,x19,lsl#32
+	add	x20,x20,x21,lsl#32
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	stp	x5,x7,[sp,#0]
+	stp	x9,x11,[sp,#16]
+	stp	x13,x15,[sp,#32]
+	stp	x17,x20,[sp,#48]
+
+.Loop_tail:
+	ldrb	w10,[x1,x2]
+	ldrb	w11,[x4,x2]
+	add	x2,x2,#1
+	eor	w10,w10,w11
+	strb	w10,[x0,x2]
+	cbnz	x2,.Loop_tail
+
+	stp	xzr,xzr,[sp,#0]
+	stp	xzr,xzr,[sp,#16]
+	stp	xzr,xzr,[sp,#32]
+	stp	xzr,xzr,[sp,#48]
+
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#64
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#96
+	ret
+ENDPROC(chacha20_arm)
+
+.align	5
+ENTRY(chacha20_neon)
+	cbz	x2,.Labort_neon
+	cmp	x2,#192
+	b.lo	.Lshort
+
+	stp	x29,x30,[sp,#-96]!
+	add	x29,sp,#0
+
+	adr	x5,.Lsigma
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+	cmp	x2,#512
+	b.hs	.L512_or_more_neon
+
+	sub	sp,sp,#64
+
+	ldp	x22,x23,[x5]		// load sigma
+	ld1	{v24.4s},[x5],#16
+	ldp	x24,x25,[x3]		// load key
+	ldp	x26,x27,[x3,#16]
+	ld1	{v25.4s,v26.4s},[x3]
+	ldp	x28,x30,[x4]		// load counter
+	ld1	{v27.4s},[x4]
+	ld1	{v31.4s},[x5]
+#ifdef	__ARMEB__
+	rev64	v24.4s,v24.4s
+	ror	x24,x24,#32
+	ror	x25,x25,#32
+	ror	x26,x26,#32
+	ror	x27,x27,#32
+	ror	x28,x28,#32
+	ror	x30,x30,#32
+#endif
+	add	v27.4s,v27.4s,v31.4s		// += 1
+	add	v28.4s,v27.4s,v31.4s
+	add	v29.4s,v28.4s,v31.4s
+	shl	v31.4s,v31.4s,#2			// 1 -> 4
+
+.Loop_outer_neon:
+	mov	w5,w22			// unpack key block
+	lsr	x6,x22,#32
+	mov	v0.16b,v24.16b
+	mov	w7,w23
+	lsr	x8,x23,#32
+	mov	v4.16b,v24.16b
+	mov	w9,w24
+	lsr	x10,x24,#32
+	mov	v16.16b,v24.16b
+	mov	w11,w25
+	mov	v1.16b,v25.16b
+	lsr	x12,x25,#32
+	mov	v5.16b,v25.16b
+	mov	w13,w26
+	mov	v17.16b,v25.16b
+	lsr	x14,x26,#32
+	mov	v3.16b,v27.16b
+	mov	w15,w27
+	mov	v7.16b,v28.16b
+	lsr	x16,x27,#32
+	mov	v19.16b,v29.16b
+	mov	w17,w28
+	mov	v2.16b,v26.16b
+	lsr	x19,x28,#32
+	mov	v6.16b,v26.16b
+	mov	w20,w30
+	mov	v18.16b,v26.16b
+	lsr	x21,x30,#32
+
+	mov	x4,#10
+	subs	x2,x2,#256
+.Loop_neon:
+	sub	x4,x4,#1
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w9
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w10
+	add	v16.4s,v16.4s,v17.4s
+	add	w7,w7,w11
+	eor	v3.16b,v3.16b,v0.16b
+	add	w8,w8,w12
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w17,w17,w5
+	eor	v19.16b,v19.16b,v16.16b
+	eor	w19,w19,w6
+	rev32	v3.8h,v3.8h
+	eor	w20,w20,w7
+	rev32	v7.8h,v7.8h
+	eor	w21,w21,w8
+	rev32	v19.8h,v19.8h
+	ror	w17,w17,#16
+	add	v2.4s,v2.4s,v3.4s
+	ror	w19,w19,#16
+	add	v6.4s,v6.4s,v7.4s
+	ror	w20,w20,#16
+	add	v18.4s,v18.4s,v19.4s
+	ror	w21,w21,#16
+	eor	v20.16b,v1.16b,v2.16b
+	add	w13,w13,w17
+	eor	v21.16b,v5.16b,v6.16b
+	add	w14,w14,w19
+	eor	v22.16b,v17.16b,v18.16b
+	add	w15,w15,w20
+	ushr	v1.4s,v20.4s,#20
+	add	w16,w16,w21
+	ushr	v5.4s,v21.4s,#20
+	eor	w9,w9,w13
+	ushr	v17.4s,v22.4s,#20
+	eor	w10,w10,w14
+	sli	v1.4s,v20.4s,#12
+	eor	w11,w11,w15
+	sli	v5.4s,v21.4s,#12
+	eor	w12,w12,w16
+	sli	v17.4s,v22.4s,#12
+	ror	w9,w9,#20
+	add	v0.4s,v0.4s,v1.4s
+	ror	w10,w10,#20
+	add	v4.4s,v4.4s,v5.4s
+	ror	w11,w11,#20
+	add	v16.4s,v16.4s,v17.4s
+	ror	w12,w12,#20
+	eor	v20.16b,v3.16b,v0.16b
+	add	w5,w5,w9
+	eor	v21.16b,v7.16b,v4.16b
+	add	w6,w6,w10
+	eor	v22.16b,v19.16b,v16.16b
+	add	w7,w7,w11
+	ushr	v3.4s,v20.4s,#24
+	add	w8,w8,w12
+	ushr	v7.4s,v21.4s,#24
+	eor	w17,w17,w5
+	ushr	v19.4s,v22.4s,#24
+	eor	w19,w19,w6
+	sli	v3.4s,v20.4s,#8
+	eor	w20,w20,w7
+	sli	v7.4s,v21.4s,#8
+	eor	w21,w21,w8
+	sli	v19.4s,v22.4s,#8
+	ror	w17,w17,#24
+	add	v2.4s,v2.4s,v3.4s
+	ror	w19,w19,#24
+	add	v6.4s,v6.4s,v7.4s
+	ror	w20,w20,#24
+	add	v18.4s,v18.4s,v19.4s
+	ror	w21,w21,#24
+	eor	v20.16b,v1.16b,v2.16b
+	add	w13,w13,w17
+	eor	v21.16b,v5.16b,v6.16b
+	add	w14,w14,w19
+	eor	v22.16b,v17.16b,v18.16b
+	add	w15,w15,w20
+	ushr	v1.4s,v20.4s,#25
+	add	w16,w16,w21
+	ushr	v5.4s,v21.4s,#25
+	eor	w9,w9,w13
+	ushr	v17.4s,v22.4s,#25
+	eor	w10,w10,w14
+	sli	v1.4s,v20.4s,#7
+	eor	w11,w11,w15
+	sli	v5.4s,v21.4s,#7
+	eor	w12,w12,w16
+	sli	v17.4s,v22.4s,#7
+	ror	w9,w9,#25
+	ext	v2.16b,v2.16b,v2.16b,#8
+	ror	w10,w10,#25
+	ext	v6.16b,v6.16b,v6.16b,#8
+	ror	w11,w11,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w12,w12,#25
+	ext	v3.16b,v3.16b,v3.16b,#12
+	ext	v7.16b,v7.16b,v7.16b,#12
+	ext	v19.16b,v19.16b,v19.16b,#12
+	ext	v1.16b,v1.16b,v1.16b,#4
+	ext	v5.16b,v5.16b,v5.16b,#4
+	ext	v17.16b,v17.16b,v17.16b,#4
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w10
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w11
+	add	v16.4s,v16.4s,v17.4s
+	add	w7,w7,w12
+	eor	v3.16b,v3.16b,v0.16b
+	add	w8,w8,w9
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w21,w21,w5
+	eor	v19.16b,v19.16b,v16.16b
+	eor	w17,w17,w6
+	rev32	v3.8h,v3.8h
+	eor	w19,w19,w7
+	rev32	v7.8h,v7.8h
+	eor	w20,w20,w8
+	rev32	v19.8h,v19.8h
+	ror	w21,w21,#16
+	add	v2.4s,v2.4s,v3.4s
+	ror	w17,w17,#16
+	add	v6.4s,v6.4s,v7.4s
+	ror	w19,w19,#16
+	add	v18.4s,v18.4s,v19.4s
+	ror	w20,w20,#16
+	eor	v20.16b,v1.16b,v2.16b
+	add	w15,w15,w21
+	eor	v21.16b,v5.16b,v6.16b
+	add	w16,w16,w17
+	eor	v22.16b,v17.16b,v18.16b
+	add	w13,w13,w19
+	ushr	v1.4s,v20.4s,#20
+	add	w14,w14,w20
+	ushr	v5.4s,v21.4s,#20
+	eor	w10,w10,w15
+	ushr	v17.4s,v22.4s,#20
+	eor	w11,w11,w16
+	sli	v1.4s,v20.4s,#12
+	eor	w12,w12,w13
+	sli	v5.4s,v21.4s,#12
+	eor	w9,w9,w14
+	sli	v17.4s,v22.4s,#12
+	ror	w10,w10,#20
+	add	v0.4s,v0.4s,v1.4s
+	ror	w11,w11,#20
+	add	v4.4s,v4.4s,v5.4s
+	ror	w12,w12,#20
+	add	v16.4s,v16.4s,v17.4s
+	ror	w9,w9,#20
+	eor	v20.16b,v3.16b,v0.16b
+	add	w5,w5,w10
+	eor	v21.16b,v7.16b,v4.16b
+	add	w6,w6,w11
+	eor	v22.16b,v19.16b,v16.16b
+	add	w7,w7,w12
+	ushr	v3.4s,v20.4s,#24
+	add	w8,w8,w9
+	ushr	v7.4s,v21.4s,#24
+	eor	w21,w21,w5
+	ushr	v19.4s,v22.4s,#24
+	eor	w17,w17,w6
+	sli	v3.4s,v20.4s,#8
+	eor	w19,w19,w7
+	sli	v7.4s,v21.4s,#8
+	eor	w20,w20,w8
+	sli	v19.4s,v22.4s,#8
+	ror	w21,w21,#24
+	add	v2.4s,v2.4s,v3.4s
+	ror	w17,w17,#24
+	add	v6.4s,v6.4s,v7.4s
+	ror	w19,w19,#24
+	add	v18.4s,v18.4s,v19.4s
+	ror	w20,w20,#24
+	eor	v20.16b,v1.16b,v2.16b
+	add	w15,w15,w21
+	eor	v21.16b,v5.16b,v6.16b
+	add	w16,w16,w17
+	eor	v22.16b,v17.16b,v18.16b
+	add	w13,w13,w19
+	ushr	v1.4s,v20.4s,#25
+	add	w14,w14,w20
+	ushr	v5.4s,v21.4s,#25
+	eor	w10,w10,w15
+	ushr	v17.4s,v22.4s,#25
+	eor	w11,w11,w16
+	sli	v1.4s,v20.4s,#7
+	eor	w12,w12,w13
+	sli	v5.4s,v21.4s,#7
+	eor	w9,w9,w14
+	sli	v17.4s,v22.4s,#7
+	ror	w10,w10,#25
+	ext	v2.16b,v2.16b,v2.16b,#8
+	ror	w11,w11,#25
+	ext	v6.16b,v6.16b,v6.16b,#8
+	ror	w12,w12,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w9,w9,#25
+	ext	v3.16b,v3.16b,v3.16b,#4
+	ext	v7.16b,v7.16b,v7.16b,#4
+	ext	v19.16b,v19.16b,v19.16b,#4
+	ext	v1.16b,v1.16b,v1.16b,#12
+	ext	v5.16b,v5.16b,v5.16b,#12
+	ext	v17.16b,v17.16b,v17.16b,#12
+	cbnz	x4,.Loop_neon
+
+	add	w5,w5,w22		// accumulate key block
+	add	v0.4s,v0.4s,v24.4s
+	add	x6,x6,x22,lsr#32
+	add	v4.4s,v4.4s,v24.4s
+	add	w7,w7,w23
+	add	v16.4s,v16.4s,v24.4s
+	add	x8,x8,x23,lsr#32
+	add	v2.4s,v2.4s,v26.4s
+	add	w9,w9,w24
+	add	v6.4s,v6.4s,v26.4s
+	add	x10,x10,x24,lsr#32
+	add	v18.4s,v18.4s,v26.4s
+	add	w11,w11,w25
+	add	v3.4s,v3.4s,v27.4s
+	add	x12,x12,x25,lsr#32
+	add	w13,w13,w26
+	add	v7.4s,v7.4s,v28.4s
+	add	x14,x14,x26,lsr#32
+	add	w15,w15,w27
+	add	v19.4s,v19.4s,v29.4s
+	add	x16,x16,x27,lsr#32
+	add	w17,w17,w28
+	add	v1.4s,v1.4s,v25.4s
+	add	x19,x19,x28,lsr#32
+	add	w20,w20,w30
+	add	v5.4s,v5.4s,v25.4s
+	add	x21,x21,x30,lsr#32
+	add	v17.4s,v17.4s,v25.4s
+
+	b.lo	.Ltail_neon
+
+	add	x5,x5,x6,lsl#32	// pack
+	add	x7,x7,x8,lsl#32
+	ldp	x6,x8,[x1,#0]		// load input
+	add	x9,x9,x10,lsl#32
+	add	x11,x11,x12,lsl#32
+	ldp	x10,x12,[x1,#16]
+	add	x13,x13,x14,lsl#32
+	add	x15,x15,x16,lsl#32
+	ldp	x14,x16,[x1,#32]
+	add	x17,x17,x19,lsl#32
+	add	x20,x20,x21,lsl#32
+	ldp	x19,x21,[x1,#48]
+	add	x1,x1,#64
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
+	eor	x5,x5,x6
+	eor	x7,x7,x8
+	eor	x9,x9,x10
+	eor	x11,x11,x12
+	eor	x13,x13,x14
+	eor	v0.16b,v0.16b,v20.16b
+	eor	x15,x15,x16
+	eor	v1.16b,v1.16b,v21.16b
+	eor	x17,x17,x19
+	eor	v2.16b,v2.16b,v22.16b
+	eor	x20,x20,x21
+	eor	v3.16b,v3.16b,v23.16b
+	ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
+
+	stp	x5,x7,[x0,#0]		// store output
+	add	x28,x28,#4			// increment counter
+	stp	x9,x11,[x0,#16]
+	add	v27.4s,v27.4s,v31.4s		// += 4
+	stp	x13,x15,[x0,#32]
+	add	v28.4s,v28.4s,v31.4s
+	stp	x17,x20,[x0,#48]
+	add	v29.4s,v29.4s,v31.4s
+	add	x0,x0,#64
+
+	st1	{v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64
+	ld1	{v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64
+
+	eor	v4.16b,v4.16b,v20.16b
+	eor	v5.16b,v5.16b,v21.16b
+	eor	v6.16b,v6.16b,v22.16b
+	eor	v7.16b,v7.16b,v23.16b
+	st1	{v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64
+
+	eor	v16.16b,v16.16b,v0.16b
+	eor	v17.16b,v17.16b,v1.16b
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v19.16b,v19.16b,v3.16b
+	st1	{v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64
+
+	b.hi	.Loop_outer_neon
+
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#64
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#96
+	ret
+
+.Ltail_neon:
+	add	x2,x2,#256
+	cmp	x2,#64
+	b.lo	.Less_than_64
+
+	add	x5,x5,x6,lsl#32	// pack
+	add	x7,x7,x8,lsl#32
+	ldp	x6,x8,[x1,#0]		// load input
+	add	x9,x9,x10,lsl#32
+	add	x11,x11,x12,lsl#32
+	ldp	x10,x12,[x1,#16]
+	add	x13,x13,x14,lsl#32
+	add	x15,x15,x16,lsl#32
+	ldp	x14,x16,[x1,#32]
+	add	x17,x17,x19,lsl#32
+	add	x20,x20,x21,lsl#32
+	ldp	x19,x21,[x1,#48]
+	add	x1,x1,#64
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	eor	x5,x5,x6
+	eor	x7,x7,x8
+	eor	x9,x9,x10
+	eor	x11,x11,x12
+	eor	x13,x13,x14
+	eor	x15,x15,x16
+	eor	x17,x17,x19
+	eor	x20,x20,x21
+
+	stp	x5,x7,[x0,#0]		// store output
+	add	x28,x28,#4			// increment counter
+	stp	x9,x11,[x0,#16]
+	stp	x13,x15,[x0,#32]
+	stp	x17,x20,[x0,#48]
+	add	x0,x0,#64
+	b.eq	.Ldone_neon
+	sub	x2,x2,#64
+	cmp	x2,#64
+	b.lo	.Less_than_128
+
+	ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
+	eor	v0.16b,v0.16b,v20.16b
+	eor	v1.16b,v1.16b,v21.16b
+	eor	v2.16b,v2.16b,v22.16b
+	eor	v3.16b,v3.16b,v23.16b
+	st1	{v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64
+	b.eq	.Ldone_neon
+	sub	x2,x2,#64
+	cmp	x2,#64
+	b.lo	.Less_than_192
+
+	ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
+	eor	v4.16b,v4.16b,v20.16b
+	eor	v5.16b,v5.16b,v21.16b
+	eor	v6.16b,v6.16b,v22.16b
+	eor	v7.16b,v7.16b,v23.16b
+	st1	{v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64
+	b.eq	.Ldone_neon
+	sub	x2,x2,#64
+
+	st1	{v16.16b,v17.16b,v18.16b,v19.16b},[sp]
+	b	.Last_neon
+
+.Less_than_128:
+	st1	{v0.16b,v1.16b,v2.16b,v3.16b},[sp]
+	b	.Last_neon
+.Less_than_192:
+	st1	{v4.16b,v5.16b,v6.16b,v7.16b},[sp]
+	b	.Last_neon
+
+.align	4
+.Last_neon:
+	sub	x0,x0,#1
+	add	x1,x1,x2
+	add	x0,x0,x2
+	add	x4,sp,x2
+	neg	x2,x2
+
+.Loop_tail_neon:
+	ldrb	w10,[x1,x2]
+	ldrb	w11,[x4,x2]
+	add	x2,x2,#1
+	eor	w10,w10,w11
+	strb	w10,[x0,x2]
+	cbnz	x2,.Loop_tail_neon
+
+	stp	xzr,xzr,[sp,#0]
+	stp	xzr,xzr,[sp,#16]
+	stp	xzr,xzr,[sp,#32]
+	stp	xzr,xzr,[sp,#48]
+
+.Ldone_neon:
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#64
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#96
+	ret
+
+.L512_or_more_neon:
+	sub	sp,sp,#128+64
+
+	ldp	x22,x23,[x5]		// load sigma
+	ld1	{v24.4s},[x5],#16
+	ldp	x24,x25,[x3]		// load key
+	ldp	x26,x27,[x3,#16]
+	ld1	{v25.4s,v26.4s},[x3]
+	ldp	x28,x30,[x4]		// load counter
+	ld1	{v27.4s},[x4]
+	ld1	{v31.4s},[x5]
+#ifdef	__ARMEB__
+	rev64	v24.4s,v24.4s
+	ror	x24,x24,#32
+	ror	x25,x25,#32
+	ror	x26,x26,#32
+	ror	x27,x27,#32
+	ror	x28,x28,#32
+	ror	x30,x30,#32
+#endif
+	add	v27.4s,v27.4s,v31.4s		// += 1
+	stp	q24,q25,[sp,#0]		// off-load key block, invariant part
+	add	v27.4s,v27.4s,v31.4s		// not typo
+	str	q26,[sp,#32]
+	add	v28.4s,v27.4s,v31.4s
+	add	v29.4s,v28.4s,v31.4s
+	add	v30.4s,v29.4s,v31.4s
+	shl	v31.4s,v31.4s,#2			// 1 -> 4
+
+	stp	d8,d9,[sp,#128+0]		// meet ABI requirements
+	stp	d10,d11,[sp,#128+16]
+	stp	d12,d13,[sp,#128+32]
+	stp	d14,d15,[sp,#128+48]
+
+	sub	x2,x2,#512			// not typo
+
+.Loop_outer_512_neon:
+	mov	v0.16b,v24.16b
+	mov	v4.16b,v24.16b
+	mov	v8.16b,v24.16b
+	mov	v12.16b,v24.16b
+	mov	v16.16b,v24.16b
+	mov	v20.16b,v24.16b
+	mov	v1.16b,v25.16b
+	mov	w5,w22			// unpack key block
+	mov	v5.16b,v25.16b
+	lsr	x6,x22,#32
+	mov	v9.16b,v25.16b
+	mov	w7,w23
+	mov	v13.16b,v25.16b
+	lsr	x8,x23,#32
+	mov	v17.16b,v25.16b
+	mov	w9,w24
+	mov	v21.16b,v25.16b
+	lsr	x10,x24,#32
+	mov	v3.16b,v27.16b
+	mov	w11,w25
+	mov	v7.16b,v28.16b
+	lsr	x12,x25,#32
+	mov	v11.16b,v29.16b
+	mov	w13,w26
+	mov	v15.16b,v30.16b
+	lsr	x14,x26,#32
+	mov	v2.16b,v26.16b
+	mov	w15,w27
+	mov	v6.16b,v26.16b
+	lsr	x16,x27,#32
+	add	v19.4s,v3.4s,v31.4s			// +4
+	mov	w17,w28
+	add	v23.4s,v7.4s,v31.4s			// +4
+	lsr	x19,x28,#32
+	mov	v10.16b,v26.16b
+	mov	w20,w30
+	mov	v14.16b,v26.16b
+	lsr	x21,x30,#32
+	mov	v18.16b,v26.16b
+	stp	q27,q28,[sp,#48]		// off-load key block, variable part
+	mov	v22.16b,v26.16b
+	str	q29,[sp,#80]
+
+	mov	x4,#5
+	subs	x2,x2,#512
+.Loop_upper_neon:
+	sub	x4,x4,#1
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w9
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w10
+	add	v8.4s,v8.4s,v9.4s
+	add	w7,w7,w11
+	add	v12.4s,v12.4s,v13.4s
+	add	w8,w8,w12
+	add	v16.4s,v16.4s,v17.4s
+	eor	w17,w17,w5
+	add	v20.4s,v20.4s,v21.4s
+	eor	w19,w19,w6
+	eor	v3.16b,v3.16b,v0.16b
+	eor	w20,w20,w7
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w21,w21,w8
+	eor	v11.16b,v11.16b,v8.16b
+	ror	w17,w17,#16
+	eor	v15.16b,v15.16b,v12.16b
+	ror	w19,w19,#16
+	eor	v19.16b,v19.16b,v16.16b
+	ror	w20,w20,#16
+	eor	v23.16b,v23.16b,v20.16b
+	ror	w21,w21,#16
+	rev32	v3.8h,v3.8h
+	add	w13,w13,w17
+	rev32	v7.8h,v7.8h
+	add	w14,w14,w19
+	rev32	v11.8h,v11.8h
+	add	w15,w15,w20
+	rev32	v15.8h,v15.8h
+	add	w16,w16,w21
+	rev32	v19.8h,v19.8h
+	eor	w9,w9,w13
+	rev32	v23.8h,v23.8h
+	eor	w10,w10,w14
+	add	v2.4s,v2.4s,v3.4s
+	eor	w11,w11,w15
+	add	v6.4s,v6.4s,v7.4s
+	eor	w12,w12,w16
+	add	v10.4s,v10.4s,v11.4s
+	ror	w9,w9,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w10,w10,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w11,w11,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w12,w12,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w9
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w10
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w11
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w12
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w17,w17,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w19,w19,w6
+	ushr	v1.4s,v24.4s,#20
+	eor	w20,w20,w7
+	ushr	v5.4s,v25.4s,#20
+	eor	w21,w21,w8
+	ushr	v9.4s,v26.4s,#20
+	ror	w17,w17,#24
+	ushr	v13.4s,v27.4s,#20
+	ror	w19,w19,#24
+	ushr	v17.4s,v28.4s,#20
+	ror	w20,w20,#24
+	ushr	v21.4s,v29.4s,#20
+	ror	w21,w21,#24
+	sli	v1.4s,v24.4s,#12
+	add	w13,w13,w17
+	sli	v5.4s,v25.4s,#12
+	add	w14,w14,w19
+	sli	v9.4s,v26.4s,#12
+	add	w15,w15,w20
+	sli	v13.4s,v27.4s,#12
+	add	w16,w16,w21
+	sli	v17.4s,v28.4s,#12
+	eor	w9,w9,w13
+	sli	v21.4s,v29.4s,#12
+	eor	w10,w10,w14
+	add	v0.4s,v0.4s,v1.4s
+	eor	w11,w11,w15
+	add	v4.4s,v4.4s,v5.4s
+	eor	w12,w12,w16
+	add	v8.4s,v8.4s,v9.4s
+	ror	w9,w9,#25
+	add	v12.4s,v12.4s,v13.4s
+	ror	w10,w10,#25
+	add	v16.4s,v16.4s,v17.4s
+	ror	w11,w11,#25
+	add	v20.4s,v20.4s,v21.4s
+	ror	w12,w12,#25
+	eor	v24.16b,v3.16b,v0.16b
+	add	w5,w5,w10
+	eor	v25.16b,v7.16b,v4.16b
+	add	w6,w6,w11
+	eor	v26.16b,v11.16b,v8.16b
+	add	w7,w7,w12
+	eor	v27.16b,v15.16b,v12.16b
+	add	w8,w8,w9
+	eor	v28.16b,v19.16b,v16.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v23.16b,v20.16b
+	eor	w17,w17,w6
+	ushr	v3.4s,v24.4s,#24
+	eor	w19,w19,w7
+	ushr	v7.4s,v25.4s,#24
+	eor	w20,w20,w8
+	ushr	v11.4s,v26.4s,#24
+	ror	w21,w21,#16
+	ushr	v15.4s,v27.4s,#24
+	ror	w17,w17,#16
+	ushr	v19.4s,v28.4s,#24
+	ror	w19,w19,#16
+	ushr	v23.4s,v29.4s,#24
+	ror	w20,w20,#16
+	sli	v3.4s,v24.4s,#8
+	add	w15,w15,w21
+	sli	v7.4s,v25.4s,#8
+	add	w16,w16,w17
+	sli	v11.4s,v26.4s,#8
+	add	w13,w13,w19
+	sli	v15.4s,v27.4s,#8
+	add	w14,w14,w20
+	sli	v19.4s,v28.4s,#8
+	eor	w10,w10,w15
+	sli	v23.4s,v29.4s,#8
+	eor	w11,w11,w16
+	add	v2.4s,v2.4s,v3.4s
+	eor	w12,w12,w13
+	add	v6.4s,v6.4s,v7.4s
+	eor	w9,w9,w14
+	add	v10.4s,v10.4s,v11.4s
+	ror	w10,w10,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w11,w11,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w12,w12,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w9,w9,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w10
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w11
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w12
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w9
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w17,w17,w6
+	ushr	v1.4s,v24.4s,#25
+	eor	w19,w19,w7
+	ushr	v5.4s,v25.4s,#25
+	eor	w20,w20,w8
+	ushr	v9.4s,v26.4s,#25
+	ror	w21,w21,#24
+	ushr	v13.4s,v27.4s,#25
+	ror	w17,w17,#24
+	ushr	v17.4s,v28.4s,#25
+	ror	w19,w19,#24
+	ushr	v21.4s,v29.4s,#25
+	ror	w20,w20,#24
+	sli	v1.4s,v24.4s,#7
+	add	w15,w15,w21
+	sli	v5.4s,v25.4s,#7
+	add	w16,w16,w17
+	sli	v9.4s,v26.4s,#7
+	add	w13,w13,w19
+	sli	v13.4s,v27.4s,#7
+	add	w14,w14,w20
+	sli	v17.4s,v28.4s,#7
+	eor	w10,w10,w15
+	sli	v21.4s,v29.4s,#7
+	eor	w11,w11,w16
+	ext	v2.16b,v2.16b,v2.16b,#8
+	eor	w12,w12,w13
+	ext	v6.16b,v6.16b,v6.16b,#8
+	eor	w9,w9,w14
+	ext	v10.16b,v10.16b,v10.16b,#8
+	ror	w10,w10,#25
+	ext	v14.16b,v14.16b,v14.16b,#8
+	ror	w11,w11,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w12,w12,#25
+	ext	v22.16b,v22.16b,v22.16b,#8
+	ror	w9,w9,#25
+	ext	v3.16b,v3.16b,v3.16b,#12
+	ext	v7.16b,v7.16b,v7.16b,#12
+	ext	v11.16b,v11.16b,v11.16b,#12
+	ext	v15.16b,v15.16b,v15.16b,#12
+	ext	v19.16b,v19.16b,v19.16b,#12
+	ext	v23.16b,v23.16b,v23.16b,#12
+	ext	v1.16b,v1.16b,v1.16b,#4
+	ext	v5.16b,v5.16b,v5.16b,#4
+	ext	v9.16b,v9.16b,v9.16b,#4
+	ext	v13.16b,v13.16b,v13.16b,#4
+	ext	v17.16b,v17.16b,v17.16b,#4
+	ext	v21.16b,v21.16b,v21.16b,#4
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w9
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w10
+	add	v8.4s,v8.4s,v9.4s
+	add	w7,w7,w11
+	add	v12.4s,v12.4s,v13.4s
+	add	w8,w8,w12
+	add	v16.4s,v16.4s,v17.4s
+	eor	w17,w17,w5
+	add	v20.4s,v20.4s,v21.4s
+	eor	w19,w19,w6
+	eor	v3.16b,v3.16b,v0.16b
+	eor	w20,w20,w7
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w21,w21,w8
+	eor	v11.16b,v11.16b,v8.16b
+	ror	w17,w17,#16
+	eor	v15.16b,v15.16b,v12.16b
+	ror	w19,w19,#16
+	eor	v19.16b,v19.16b,v16.16b
+	ror	w20,w20,#16
+	eor	v23.16b,v23.16b,v20.16b
+	ror	w21,w21,#16
+	rev32	v3.8h,v3.8h
+	add	w13,w13,w17
+	rev32	v7.8h,v7.8h
+	add	w14,w14,w19
+	rev32	v11.8h,v11.8h
+	add	w15,w15,w20
+	rev32	v15.8h,v15.8h
+	add	w16,w16,w21
+	rev32	v19.8h,v19.8h
+	eor	w9,w9,w13
+	rev32	v23.8h,v23.8h
+	eor	w10,w10,w14
+	add	v2.4s,v2.4s,v3.4s
+	eor	w11,w11,w15
+	add	v6.4s,v6.4s,v7.4s
+	eor	w12,w12,w16
+	add	v10.4s,v10.4s,v11.4s
+	ror	w9,w9,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w10,w10,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w11,w11,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w12,w12,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w9
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w10
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w11
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w12
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w17,w17,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w19,w19,w6
+	ushr	v1.4s,v24.4s,#20
+	eor	w20,w20,w7
+	ushr	v5.4s,v25.4s,#20
+	eor	w21,w21,w8
+	ushr	v9.4s,v26.4s,#20
+	ror	w17,w17,#24
+	ushr	v13.4s,v27.4s,#20
+	ror	w19,w19,#24
+	ushr	v17.4s,v28.4s,#20
+	ror	w20,w20,#24
+	ushr	v21.4s,v29.4s,#20
+	ror	w21,w21,#24
+	sli	v1.4s,v24.4s,#12
+	add	w13,w13,w17
+	sli	v5.4s,v25.4s,#12
+	add	w14,w14,w19
+	sli	v9.4s,v26.4s,#12
+	add	w15,w15,w20
+	sli	v13.4s,v27.4s,#12
+	add	w16,w16,w21
+	sli	v17.4s,v28.4s,#12
+	eor	w9,w9,w13
+	sli	v21.4s,v29.4s,#12
+	eor	w10,w10,w14
+	add	v0.4s,v0.4s,v1.4s
+	eor	w11,w11,w15
+	add	v4.4s,v4.4s,v5.4s
+	eor	w12,w12,w16
+	add	v8.4s,v8.4s,v9.4s
+	ror	w9,w9,#25
+	add	v12.4s,v12.4s,v13.4s
+	ror	w10,w10,#25
+	add	v16.4s,v16.4s,v17.4s
+	ror	w11,w11,#25
+	add	v20.4s,v20.4s,v21.4s
+	ror	w12,w12,#25
+	eor	v24.16b,v3.16b,v0.16b
+	add	w5,w5,w10
+	eor	v25.16b,v7.16b,v4.16b
+	add	w6,w6,w11
+	eor	v26.16b,v11.16b,v8.16b
+	add	w7,w7,w12
+	eor	v27.16b,v15.16b,v12.16b
+	add	w8,w8,w9
+	eor	v28.16b,v19.16b,v16.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v23.16b,v20.16b
+	eor	w17,w17,w6
+	ushr	v3.4s,v24.4s,#24
+	eor	w19,w19,w7
+	ushr	v7.4s,v25.4s,#24
+	eor	w20,w20,w8
+	ushr	v11.4s,v26.4s,#24
+	ror	w21,w21,#16
+	ushr	v15.4s,v27.4s,#24
+	ror	w17,w17,#16
+	ushr	v19.4s,v28.4s,#24
+	ror	w19,w19,#16
+	ushr	v23.4s,v29.4s,#24
+	ror	w20,w20,#16
+	sli	v3.4s,v24.4s,#8
+	add	w15,w15,w21
+	sli	v7.4s,v25.4s,#8
+	add	w16,w16,w17
+	sli	v11.4s,v26.4s,#8
+	add	w13,w13,w19
+	sli	v15.4s,v27.4s,#8
+	add	w14,w14,w20
+	sli	v19.4s,v28.4s,#8
+	eor	w10,w10,w15
+	sli	v23.4s,v29.4s,#8
+	eor	w11,w11,w16
+	add	v2.4s,v2.4s,v3.4s
+	eor	w12,w12,w13
+	add	v6.4s,v6.4s,v7.4s
+	eor	w9,w9,w14
+	add	v10.4s,v10.4s,v11.4s
+	ror	w10,w10,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w11,w11,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w12,w12,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w9,w9,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w10
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w11
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w12
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w9
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w17,w17,w6
+	ushr	v1.4s,v24.4s,#25
+	eor	w19,w19,w7
+	ushr	v5.4s,v25.4s,#25
+	eor	w20,w20,w8
+	ushr	v9.4s,v26.4s,#25
+	ror	w21,w21,#24
+	ushr	v13.4s,v27.4s,#25
+	ror	w17,w17,#24
+	ushr	v17.4s,v28.4s,#25
+	ror	w19,w19,#24
+	ushr	v21.4s,v29.4s,#25
+	ror	w20,w20,#24
+	sli	v1.4s,v24.4s,#7
+	add	w15,w15,w21
+	sli	v5.4s,v25.4s,#7
+	add	w16,w16,w17
+	sli	v9.4s,v26.4s,#7
+	add	w13,w13,w19
+	sli	v13.4s,v27.4s,#7
+	add	w14,w14,w20
+	sli	v17.4s,v28.4s,#7
+	eor	w10,w10,w15
+	sli	v21.4s,v29.4s,#7
+	eor	w11,w11,w16
+	ext	v2.16b,v2.16b,v2.16b,#8
+	eor	w12,w12,w13
+	ext	v6.16b,v6.16b,v6.16b,#8
+	eor	w9,w9,w14
+	ext	v10.16b,v10.16b,v10.16b,#8
+	ror	w10,w10,#25
+	ext	v14.16b,v14.16b,v14.16b,#8
+	ror	w11,w11,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w12,w12,#25
+	ext	v22.16b,v22.16b,v22.16b,#8
+	ror	w9,w9,#25
+	ext	v3.16b,v3.16b,v3.16b,#4
+	ext	v7.16b,v7.16b,v7.16b,#4
+	ext	v11.16b,v11.16b,v11.16b,#4
+	ext	v15.16b,v15.16b,v15.16b,#4
+	ext	v19.16b,v19.16b,v19.16b,#4
+	ext	v23.16b,v23.16b,v23.16b,#4
+	ext	v1.16b,v1.16b,v1.16b,#12
+	ext	v5.16b,v5.16b,v5.16b,#12
+	ext	v9.16b,v9.16b,v9.16b,#12
+	ext	v13.16b,v13.16b,v13.16b,#12
+	ext	v17.16b,v17.16b,v17.16b,#12
+	ext	v21.16b,v21.16b,v21.16b,#12
+	cbnz	x4,.Loop_upper_neon
+
+	add	w5,w5,w22		// accumulate key block
+	add	x6,x6,x22,lsr#32
+	add	w7,w7,w23
+	add	x8,x8,x23,lsr#32
+	add	w9,w9,w24
+	add	x10,x10,x24,lsr#32
+	add	w11,w11,w25
+	add	x12,x12,x25,lsr#32
+	add	w13,w13,w26
+	add	x14,x14,x26,lsr#32
+	add	w15,w15,w27
+	add	x16,x16,x27,lsr#32
+	add	w17,w17,w28
+	add	x19,x19,x28,lsr#32
+	add	w20,w20,w30
+	add	x21,x21,x30,lsr#32
+
+	add	x5,x5,x6,lsl#32	// pack
+	add	x7,x7,x8,lsl#32
+	ldp	x6,x8,[x1,#0]		// load input
+	add	x9,x9,x10,lsl#32
+	add	x11,x11,x12,lsl#32
+	ldp	x10,x12,[x1,#16]
+	add	x13,x13,x14,lsl#32
+	add	x15,x15,x16,lsl#32
+	ldp	x14,x16,[x1,#32]
+	add	x17,x17,x19,lsl#32
+	add	x20,x20,x21,lsl#32
+	ldp	x19,x21,[x1,#48]
+	add	x1,x1,#64
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	eor	x5,x5,x6
+	eor	x7,x7,x8
+	eor	x9,x9,x10
+	eor	x11,x11,x12
+	eor	x13,x13,x14
+	eor	x15,x15,x16
+	eor	x17,x17,x19
+	eor	x20,x20,x21
+
+	stp	x5,x7,[x0,#0]		// store output
+	add	x28,x28,#1			// increment counter
+	mov	w5,w22			// unpack key block
+	lsr	x6,x22,#32
+	stp	x9,x11,[x0,#16]
+	mov	w7,w23
+	lsr	x8,x23,#32
+	stp	x13,x15,[x0,#32]
+	mov	w9,w24
+	lsr	x10,x24,#32
+	stp	x17,x20,[x0,#48]
+	add	x0,x0,#64
+	mov	w11,w25
+	lsr	x12,x25,#32
+	mov	w13,w26
+	lsr	x14,x26,#32
+	mov	w15,w27
+	lsr	x16,x27,#32
+	mov	w17,w28
+	lsr	x19,x28,#32
+	mov	w20,w30
+	lsr	x21,x30,#32
+
+	mov	x4,#5
+.Loop_lower_neon:
+	sub	x4,x4,#1
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w9
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w10
+	add	v8.4s,v8.4s,v9.4s
+	add	w7,w7,w11
+	add	v12.4s,v12.4s,v13.4s
+	add	w8,w8,w12
+	add	v16.4s,v16.4s,v17.4s
+	eor	w17,w17,w5
+	add	v20.4s,v20.4s,v21.4s
+	eor	w19,w19,w6
+	eor	v3.16b,v3.16b,v0.16b
+	eor	w20,w20,w7
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w21,w21,w8
+	eor	v11.16b,v11.16b,v8.16b
+	ror	w17,w17,#16
+	eor	v15.16b,v15.16b,v12.16b
+	ror	w19,w19,#16
+	eor	v19.16b,v19.16b,v16.16b
+	ror	w20,w20,#16
+	eor	v23.16b,v23.16b,v20.16b
+	ror	w21,w21,#16
+	rev32	v3.8h,v3.8h
+	add	w13,w13,w17
+	rev32	v7.8h,v7.8h
+	add	w14,w14,w19
+	rev32	v11.8h,v11.8h
+	add	w15,w15,w20
+	rev32	v15.8h,v15.8h
+	add	w16,w16,w21
+	rev32	v19.8h,v19.8h
+	eor	w9,w9,w13
+	rev32	v23.8h,v23.8h
+	eor	w10,w10,w14
+	add	v2.4s,v2.4s,v3.4s
+	eor	w11,w11,w15
+	add	v6.4s,v6.4s,v7.4s
+	eor	w12,w12,w16
+	add	v10.4s,v10.4s,v11.4s
+	ror	w9,w9,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w10,w10,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w11,w11,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w12,w12,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w9
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w10
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w11
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w12
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w17,w17,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w19,w19,w6
+	ushr	v1.4s,v24.4s,#20
+	eor	w20,w20,w7
+	ushr	v5.4s,v25.4s,#20
+	eor	w21,w21,w8
+	ushr	v9.4s,v26.4s,#20
+	ror	w17,w17,#24
+	ushr	v13.4s,v27.4s,#20
+	ror	w19,w19,#24
+	ushr	v17.4s,v28.4s,#20
+	ror	w20,w20,#24
+	ushr	v21.4s,v29.4s,#20
+	ror	w21,w21,#24
+	sli	v1.4s,v24.4s,#12
+	add	w13,w13,w17
+	sli	v5.4s,v25.4s,#12
+	add	w14,w14,w19
+	sli	v9.4s,v26.4s,#12
+	add	w15,w15,w20
+	sli	v13.4s,v27.4s,#12
+	add	w16,w16,w21
+	sli	v17.4s,v28.4s,#12
+	eor	w9,w9,w13
+	sli	v21.4s,v29.4s,#12
+	eor	w10,w10,w14
+	add	v0.4s,v0.4s,v1.4s
+	eor	w11,w11,w15
+	add	v4.4s,v4.4s,v5.4s
+	eor	w12,w12,w16
+	add	v8.4s,v8.4s,v9.4s
+	ror	w9,w9,#25
+	add	v12.4s,v12.4s,v13.4s
+	ror	w10,w10,#25
+	add	v16.4s,v16.4s,v17.4s
+	ror	w11,w11,#25
+	add	v20.4s,v20.4s,v21.4s
+	ror	w12,w12,#25
+	eor	v24.16b,v3.16b,v0.16b
+	add	w5,w5,w10
+	eor	v25.16b,v7.16b,v4.16b
+	add	w6,w6,w11
+	eor	v26.16b,v11.16b,v8.16b
+	add	w7,w7,w12
+	eor	v27.16b,v15.16b,v12.16b
+	add	w8,w8,w9
+	eor	v28.16b,v19.16b,v16.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v23.16b,v20.16b
+	eor	w17,w17,w6
+	ushr	v3.4s,v24.4s,#24
+	eor	w19,w19,w7
+	ushr	v7.4s,v25.4s,#24
+	eor	w20,w20,w8
+	ushr	v11.4s,v26.4s,#24
+	ror	w21,w21,#16
+	ushr	v15.4s,v27.4s,#24
+	ror	w17,w17,#16
+	ushr	v19.4s,v28.4s,#24
+	ror	w19,w19,#16
+	ushr	v23.4s,v29.4s,#24
+	ror	w20,w20,#16
+	sli	v3.4s,v24.4s,#8
+	add	w15,w15,w21
+	sli	v7.4s,v25.4s,#8
+	add	w16,w16,w17
+	sli	v11.4s,v26.4s,#8
+	add	w13,w13,w19
+	sli	v15.4s,v27.4s,#8
+	add	w14,w14,w20
+	sli	v19.4s,v28.4s,#8
+	eor	w10,w10,w15
+	sli	v23.4s,v29.4s,#8
+	eor	w11,w11,w16
+	add	v2.4s,v2.4s,v3.4s
+	eor	w12,w12,w13
+	add	v6.4s,v6.4s,v7.4s
+	eor	w9,w9,w14
+	add	v10.4s,v10.4s,v11.4s
+	ror	w10,w10,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w11,w11,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w12,w12,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w9,w9,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w10
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w11
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w12
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w9
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w17,w17,w6
+	ushr	v1.4s,v24.4s,#25
+	eor	w19,w19,w7
+	ushr	v5.4s,v25.4s,#25
+	eor	w20,w20,w8
+	ushr	v9.4s,v26.4s,#25
+	ror	w21,w21,#24
+	ushr	v13.4s,v27.4s,#25
+	ror	w17,w17,#24
+	ushr	v17.4s,v28.4s,#25
+	ror	w19,w19,#24
+	ushr	v21.4s,v29.4s,#25
+	ror	w20,w20,#24
+	sli	v1.4s,v24.4s,#7
+	add	w15,w15,w21
+	sli	v5.4s,v25.4s,#7
+	add	w16,w16,w17
+	sli	v9.4s,v26.4s,#7
+	add	w13,w13,w19
+	sli	v13.4s,v27.4s,#7
+	add	w14,w14,w20
+	sli	v17.4s,v28.4s,#7
+	eor	w10,w10,w15
+	sli	v21.4s,v29.4s,#7
+	eor	w11,w11,w16
+	ext	v2.16b,v2.16b,v2.16b,#8
+	eor	w12,w12,w13
+	ext	v6.16b,v6.16b,v6.16b,#8
+	eor	w9,w9,w14
+	ext	v10.16b,v10.16b,v10.16b,#8
+	ror	w10,w10,#25
+	ext	v14.16b,v14.16b,v14.16b,#8
+	ror	w11,w11,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w12,w12,#25
+	ext	v22.16b,v22.16b,v22.16b,#8
+	ror	w9,w9,#25
+	ext	v3.16b,v3.16b,v3.16b,#12
+	ext	v7.16b,v7.16b,v7.16b,#12
+	ext	v11.16b,v11.16b,v11.16b,#12
+	ext	v15.16b,v15.16b,v15.16b,#12
+	ext	v19.16b,v19.16b,v19.16b,#12
+	ext	v23.16b,v23.16b,v23.16b,#12
+	ext	v1.16b,v1.16b,v1.16b,#4
+	ext	v5.16b,v5.16b,v5.16b,#4
+	ext	v9.16b,v9.16b,v9.16b,#4
+	ext	v13.16b,v13.16b,v13.16b,#4
+	ext	v17.16b,v17.16b,v17.16b,#4
+	ext	v21.16b,v21.16b,v21.16b,#4
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w9
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w10
+	add	v8.4s,v8.4s,v9.4s
+	add	w7,w7,w11
+	add	v12.4s,v12.4s,v13.4s
+	add	w8,w8,w12
+	add	v16.4s,v16.4s,v17.4s
+	eor	w17,w17,w5
+	add	v20.4s,v20.4s,v21.4s
+	eor	w19,w19,w6
+	eor	v3.16b,v3.16b,v0.16b
+	eor	w20,w20,w7
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w21,w21,w8
+	eor	v11.16b,v11.16b,v8.16b
+	ror	w17,w17,#16
+	eor	v15.16b,v15.16b,v12.16b
+	ror	w19,w19,#16
+	eor	v19.16b,v19.16b,v16.16b
+	ror	w20,w20,#16
+	eor	v23.16b,v23.16b,v20.16b
+	ror	w21,w21,#16
+	rev32	v3.8h,v3.8h
+	add	w13,w13,w17
+	rev32	v7.8h,v7.8h
+	add	w14,w14,w19
+	rev32	v11.8h,v11.8h
+	add	w15,w15,w20
+	rev32	v15.8h,v15.8h
+	add	w16,w16,w21
+	rev32	v19.8h,v19.8h
+	eor	w9,w9,w13
+	rev32	v23.8h,v23.8h
+	eor	w10,w10,w14
+	add	v2.4s,v2.4s,v3.4s
+	eor	w11,w11,w15
+	add	v6.4s,v6.4s,v7.4s
+	eor	w12,w12,w16
+	add	v10.4s,v10.4s,v11.4s
+	ror	w9,w9,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w10,w10,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w11,w11,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w12,w12,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w9
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w10
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w11
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w12
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w17,w17,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w19,w19,w6
+	ushr	v1.4s,v24.4s,#20
+	eor	w20,w20,w7
+	ushr	v5.4s,v25.4s,#20
+	eor	w21,w21,w8
+	ushr	v9.4s,v26.4s,#20
+	ror	w17,w17,#24
+	ushr	v13.4s,v27.4s,#20
+	ror	w19,w19,#24
+	ushr	v17.4s,v28.4s,#20
+	ror	w20,w20,#24
+	ushr	v21.4s,v29.4s,#20
+	ror	w21,w21,#24
+	sli	v1.4s,v24.4s,#12
+	add	w13,w13,w17
+	sli	v5.4s,v25.4s,#12
+	add	w14,w14,w19
+	sli	v9.4s,v26.4s,#12
+	add	w15,w15,w20
+	sli	v13.4s,v27.4s,#12
+	add	w16,w16,w21
+	sli	v17.4s,v28.4s,#12
+	eor	w9,w9,w13
+	sli	v21.4s,v29.4s,#12
+	eor	w10,w10,w14
+	add	v0.4s,v0.4s,v1.4s
+	eor	w11,w11,w15
+	add	v4.4s,v4.4s,v5.4s
+	eor	w12,w12,w16
+	add	v8.4s,v8.4s,v9.4s
+	ror	w9,w9,#25
+	add	v12.4s,v12.4s,v13.4s
+	ror	w10,w10,#25
+	add	v16.4s,v16.4s,v17.4s
+	ror	w11,w11,#25
+	add	v20.4s,v20.4s,v21.4s
+	ror	w12,w12,#25
+	eor	v24.16b,v3.16b,v0.16b
+	add	w5,w5,w10
+	eor	v25.16b,v7.16b,v4.16b
+	add	w6,w6,w11
+	eor	v26.16b,v11.16b,v8.16b
+	add	w7,w7,w12
+	eor	v27.16b,v15.16b,v12.16b
+	add	w8,w8,w9
+	eor	v28.16b,v19.16b,v16.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v23.16b,v20.16b
+	eor	w17,w17,w6
+	ushr	v3.4s,v24.4s,#24
+	eor	w19,w19,w7
+	ushr	v7.4s,v25.4s,#24
+	eor	w20,w20,w8
+	ushr	v11.4s,v26.4s,#24
+	ror	w21,w21,#16
+	ushr	v15.4s,v27.4s,#24
+	ror	w17,w17,#16
+	ushr	v19.4s,v28.4s,#24
+	ror	w19,w19,#16
+	ushr	v23.4s,v29.4s,#24
+	ror	w20,w20,#16
+	sli	v3.4s,v24.4s,#8
+	add	w15,w15,w21
+	sli	v7.4s,v25.4s,#8
+	add	w16,w16,w17
+	sli	v11.4s,v26.4s,#8
+	add	w13,w13,w19
+	sli	v15.4s,v27.4s,#8
+	add	w14,w14,w20
+	sli	v19.4s,v28.4s,#8
+	eor	w10,w10,w15
+	sli	v23.4s,v29.4s,#8
+	eor	w11,w11,w16
+	add	v2.4s,v2.4s,v3.4s
+	eor	w12,w12,w13
+	add	v6.4s,v6.4s,v7.4s
+	eor	w9,w9,w14
+	add	v10.4s,v10.4s,v11.4s
+	ror	w10,w10,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w11,w11,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w12,w12,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w9,w9,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w10
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w11
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w12
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w9
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w17,w17,w6
+	ushr	v1.4s,v24.4s,#25
+	eor	w19,w19,w7
+	ushr	v5.4s,v25.4s,#25
+	eor	w20,w20,w8
+	ushr	v9.4s,v26.4s,#25
+	ror	w21,w21,#24
+	ushr	v13.4s,v27.4s,#25
+	ror	w17,w17,#24
+	ushr	v17.4s,v28.4s,#25
+	ror	w19,w19,#24
+	ushr	v21.4s,v29.4s,#25
+	ror	w20,w20,#24
+	sli	v1.4s,v24.4s,#7
+	add	w15,w15,w21
+	sli	v5.4s,v25.4s,#7
+	add	w16,w16,w17
+	sli	v9.4s,v26.4s,#7
+	add	w13,w13,w19
+	sli	v13.4s,v27.4s,#7
+	add	w14,w14,w20
+	sli	v17.4s,v28.4s,#7
+	eor	w10,w10,w15
+	sli	v21.4s,v29.4s,#7
+	eor	w11,w11,w16
+	ext	v2.16b,v2.16b,v2.16b,#8
+	eor	w12,w12,w13
+	ext	v6.16b,v6.16b,v6.16b,#8
+	eor	w9,w9,w14
+	ext	v10.16b,v10.16b,v10.16b,#8
+	ror	w10,w10,#25
+	ext	v14.16b,v14.16b,v14.16b,#8
+	ror	w11,w11,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w12,w12,#25
+	ext	v22.16b,v22.16b,v22.16b,#8
+	ror	w9,w9,#25
+	ext	v3.16b,v3.16b,v3.16b,#4
+	ext	v7.16b,v7.16b,v7.16b,#4
+	ext	v11.16b,v11.16b,v11.16b,#4
+	ext	v15.16b,v15.16b,v15.16b,#4
+	ext	v19.16b,v19.16b,v19.16b,#4
+	ext	v23.16b,v23.16b,v23.16b,#4
+	ext	v1.16b,v1.16b,v1.16b,#12
+	ext	v5.16b,v5.16b,v5.16b,#12
+	ext	v9.16b,v9.16b,v9.16b,#12
+	ext	v13.16b,v13.16b,v13.16b,#12
+	ext	v17.16b,v17.16b,v17.16b,#12
+	ext	v21.16b,v21.16b,v21.16b,#12
+	cbnz	x4,.Loop_lower_neon
+
+	add	w5,w5,w22		// accumulate key block
+	ldp	q24,q25,[sp,#0]
+	add	x6,x6,x22,lsr#32
+	ldp	q26,q27,[sp,#32]
+	add	w7,w7,w23
+	ldp	q28,q29,[sp,#64]
+	add	x8,x8,x23,lsr#32
+	add	v0.4s,v0.4s,v24.4s
+	add	w9,w9,w24
+	add	v4.4s,v4.4s,v24.4s
+	add	x10,x10,x24,lsr#32
+	add	v8.4s,v8.4s,v24.4s
+	add	w11,w11,w25
+	add	v12.4s,v12.4s,v24.4s
+	add	x12,x12,x25,lsr#32
+	add	v16.4s,v16.4s,v24.4s
+	add	w13,w13,w26
+	add	v20.4s,v20.4s,v24.4s
+	add	x14,x14,x26,lsr#32
+	add	v2.4s,v2.4s,v26.4s
+	add	w15,w15,w27
+	add	v6.4s,v6.4s,v26.4s
+	add	x16,x16,x27,lsr#32
+	add	v10.4s,v10.4s,v26.4s
+	add	w17,w17,w28
+	add	v14.4s,v14.4s,v26.4s
+	add	x19,x19,x28,lsr#32
+	add	v18.4s,v18.4s,v26.4s
+	add	w20,w20,w30
+	add	v22.4s,v22.4s,v26.4s
+	add	x21,x21,x30,lsr#32
+	add	v19.4s,v19.4s,v31.4s			// +4
+	add	x5,x5,x6,lsl#32	// pack
+	add	v23.4s,v23.4s,v31.4s			// +4
+	add	x7,x7,x8,lsl#32
+	add	v3.4s,v3.4s,v27.4s
+	ldp	x6,x8,[x1,#0]		// load input
+	add	v7.4s,v7.4s,v28.4s
+	add	x9,x9,x10,lsl#32
+	add	v11.4s,v11.4s,v29.4s
+	add	x11,x11,x12,lsl#32
+	add	v15.4s,v15.4s,v30.4s
+	ldp	x10,x12,[x1,#16]
+	add	v19.4s,v19.4s,v27.4s
+	add	x13,x13,x14,lsl#32
+	add	v23.4s,v23.4s,v28.4s
+	add	x15,x15,x16,lsl#32
+	add	v1.4s,v1.4s,v25.4s
+	ldp	x14,x16,[x1,#32]
+	add	v5.4s,v5.4s,v25.4s
+	add	x17,x17,x19,lsl#32
+	add	v9.4s,v9.4s,v25.4s
+	add	x20,x20,x21,lsl#32
+	add	v13.4s,v13.4s,v25.4s
+	ldp	x19,x21,[x1,#48]
+	add	v17.4s,v17.4s,v25.4s
+	add	x1,x1,#64
+	add	v21.4s,v21.4s,v25.4s
+
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	ld1	{v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64
+	eor	x5,x5,x6
+	eor	x7,x7,x8
+	eor	x9,x9,x10
+	eor	x11,x11,x12
+	eor	x13,x13,x14
+	eor	v0.16b,v0.16b,v24.16b
+	eor	x15,x15,x16
+	eor	v1.16b,v1.16b,v25.16b
+	eor	x17,x17,x19
+	eor	v2.16b,v2.16b,v26.16b
+	eor	x20,x20,x21
+	eor	v3.16b,v3.16b,v27.16b
+	ld1	{v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64
+
+	stp	x5,x7,[x0,#0]		// store output
+	add	x28,x28,#7			// increment counter
+	stp	x9,x11,[x0,#16]
+	stp	x13,x15,[x0,#32]
+	stp	x17,x20,[x0,#48]
+	add	x0,x0,#64
+	st1	{v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64
+
+	ld1	{v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64
+	eor	v4.16b,v4.16b,v24.16b
+	eor	v5.16b,v5.16b,v25.16b
+	eor	v6.16b,v6.16b,v26.16b
+	eor	v7.16b,v7.16b,v27.16b
+	st1	{v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64
+
+	ld1	{v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
+	eor	v8.16b,v8.16b,v0.16b
+	ldp	q24,q25,[sp,#0]
+	eor	v9.16b,v9.16b,v1.16b
+	ldp	q26,q27,[sp,#32]
+	eor	v10.16b,v10.16b,v2.16b
+	eor	v11.16b,v11.16b,v3.16b
+	st1	{v8.16b,v9.16b,v10.16b,v11.16b},[x0],#64
+
+	ld1	{v8.16b,v9.16b,v10.16b,v11.16b},[x1],#64
+	eor	v12.16b,v12.16b,v4.16b
+	eor	v13.16b,v13.16b,v5.16b
+	eor	v14.16b,v14.16b,v6.16b
+	eor	v15.16b,v15.16b,v7.16b
+	st1	{v12.16b,v13.16b,v14.16b,v15.16b},[x0],#64
+
+	ld1	{v12.16b,v13.16b,v14.16b,v15.16b},[x1],#64
+	eor	v16.16b,v16.16b,v8.16b
+	eor	v17.16b,v17.16b,v9.16b
+	eor	v18.16b,v18.16b,v10.16b
+	eor	v19.16b,v19.16b,v11.16b
+	st1	{v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64
+
+	shl	v0.4s,v31.4s,#1			// 4 -> 8
+	eor	v20.16b,v20.16b,v12.16b
+	eor	v21.16b,v21.16b,v13.16b
+	eor	v22.16b,v22.16b,v14.16b
+	eor	v23.16b,v23.16b,v15.16b
+	st1	{v20.16b,v21.16b,v22.16b,v23.16b},[x0],#64
+
+	add	v27.4s,v27.4s,v0.4s			// += 8
+	add	v28.4s,v28.4s,v0.4s
+	add	v29.4s,v29.4s,v0.4s
+	add	v30.4s,v30.4s,v0.4s
+
+	b.hs	.Loop_outer_512_neon
+
+	adds	x2,x2,#512
+	ushr	v0.4s,v31.4s,#2			// 4 -> 1
+
+	ldp	d8,d9,[sp,#128+0]		// meet ABI requirements
+	ldp	d10,d11,[sp,#128+16]
+	ldp	d12,d13,[sp,#128+32]
+	ldp	d14,d15,[sp,#128+48]
+
+	stp	q24,q31,[sp,#0]		// wipe off-load area
+	stp	q24,q31,[sp,#32]
+	stp	q24,q31,[sp,#64]
+
+	b.eq	.Ldone_512_neon
+
+	cmp	x2,#192
+	sub	v27.4s,v27.4s,v0.4s			// -= 1
+	sub	v28.4s,v28.4s,v0.4s
+	sub	v29.4s,v29.4s,v0.4s
+	add	sp,sp,#128
+	b.hs	.Loop_outer_neon
+
+	eor	v25.16b,v25.16b,v25.16b
+	eor	v26.16b,v26.16b,v26.16b
+	eor	v27.16b,v27.16b,v27.16b
+	eor	v28.16b,v28.16b,v28.16b
+	eor	v29.16b,v29.16b,v29.16b
+	eor	v30.16b,v30.16b,v30.16b
+	b	.Loop_outer
+
+.Ldone_512_neon:
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#128+64
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#96
+.Labort_neon:
+	ret
+ENDPROC(chacha20_neon)
diff --git b/net/wireguard/crypto/chacha20-x86_64.S b/net/wireguard/crypto/chacha20-x86_64.S
new file mode 100644
index 0000000..39d7080
--- /dev/null
+++ b/net/wireguard/crypto/chacha20-x86_64.S
@@ -0,0 +1,2630 @@
+/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
+ *
+ * Copyright (C) 2017 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+
+.section .rodata.cst16.Lzero, "aM", @progbits, 16
+.align	16
+.Lzero:
+.long	0,0,0,0
+.section .rodata.cst16.Lone, "aM", @progbits, 16
+.align	16
+.Lone:
+.long	1,0,0,0
+.section .rodata.cst16.Linc, "aM", @progbits, 16
+.align	16
+.Linc:
+.long	0,1,2,3
+.section .rodata.cst16.Lfour, "aM", @progbits, 16
+.align	16
+.Lfour:
+.long	4,4,4,4
+.section .rodata.cst32.Lincy, "aM", @progbits, 32
+.align	32
+.Lincy:
+.long	0,2,4,6,1,3,5,7
+.section .rodata.cst32.Leight, "aM", @progbits, 32
+.align	32
+.Leight:
+.long	8,8,8,8,8,8,8,8
+.section .rodata.cst16.Lrot16, "aM", @progbits, 16
+.align	16
+.Lrot16:
+.byte	0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd
+.section .rodata.cst16.Lrot24, "aM", @progbits, 16
+.align	16
+.Lrot24:
+.byte	0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe
+.section .rodata.cst16.Lsigma, "aM", @progbits, 16
+.align	16
+.Lsigma:
+.byte	101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0
+.section .rodata.cst64.Lzeroz, "aM", @progbits, 64
+.align	64
+.Lzeroz:
+.long	0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0
+.section .rodata.cst64.Lfourz, "aM", @progbits, 64
+.align	64
+.Lfourz:
+.long	4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0
+.section .rodata.cst64.Lincz, "aM", @progbits, 64
+.align	64
+.Lincz:
+.long	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+.section .rodata.cst64.Lsixteen, "aM", @progbits, 64
+.align	64
+.Lsixteen:
+.long	16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
+.section .rodata.cst32.Ltwoy, "aM", @progbits, 32
+.align	64
+.Ltwoy:
+.long	2,0,0,0, 2,0,0,0
+
+.text
+
+#ifdef CONFIG_AS_SSSE3
+.align	32
+ENTRY(hchacha20_ssse3)
+	movdqa	.Lsigma(%rip),%xmm0
+	movdqu	(%rdx),%xmm1
+	movdqu	16(%rdx),%xmm2
+	movdqu	(%rsi),%xmm3
+	movdqa	.Lrot16(%rip),%xmm6
+	movdqa	.Lrot24(%rip),%xmm7
+	movq	$10,%r8
+	.align	32
+.Loop_hssse3:
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+	pshufb	%xmm6,%xmm3
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+	pshufb	%xmm7,%xmm3
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$57,%xmm1,%xmm1
+	pshufd	$147,%xmm3,%xmm3
+	nop
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+	pshufb	%xmm6,%xmm3
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+	pshufb	%xmm7,%xmm3
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$147,%xmm1,%xmm1
+	pshufd	$57,%xmm3,%xmm3
+	decq	%r8
+	jnz	.Loop_hssse3
+	movdqu	%xmm0,0(%rdi)
+	movdqu	%xmm3,16(%rdi)
+	ret
+ENDPROC(hchacha20_ssse3)
+
+.align	32
+ENTRY(chacha20_ssse3)
+.Lchacha20_ssse3:
+	cmpq	$0,%rdx
+	je	.Lssse3_epilogue
+	leaq	8(%rsp),%r10
+
+	cmpq	$128,%rdx
+	ja	.Lchacha20_4x
+
+.Ldo_sse3_after_all:
+	subq	$64+8,%rsp
+	andq	$-32,%rsp
+	movdqa	.Lsigma(%rip),%xmm0
+	movdqu	(%rcx),%xmm1
+	movdqu	16(%rcx),%xmm2
+	movdqu	(%r8),%xmm3
+	movdqa	.Lrot16(%rip),%xmm6
+	movdqa	.Lrot24(%rip),%xmm7
+
+	movdqa	%xmm0,0(%rsp)
+	movdqa	%xmm1,16(%rsp)
+	movdqa	%xmm2,32(%rsp)
+	movdqa	%xmm3,48(%rsp)
+	movq	$10,%r8
+	jmp	.Loop_ssse3
+
+.align	32
+.Loop_outer_ssse3:
+	movdqa	.Lone(%rip),%xmm3
+	movdqa	0(%rsp),%xmm0
+	movdqa	16(%rsp),%xmm1
+	movdqa	32(%rsp),%xmm2
+	paddd	48(%rsp),%xmm3
+	movq	$10,%r8
+	movdqa	%xmm3,48(%rsp)
+	jmp	.Loop_ssse3
+
+.align	32
+.Loop_ssse3:
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+	pshufb	%xmm6,%xmm3
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+	pshufb	%xmm7,%xmm3
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$57,%xmm1,%xmm1
+	pshufd	$147,%xmm3,%xmm3
+	nop
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+	pshufb	%xmm6,%xmm3
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+	pshufb	%xmm7,%xmm3
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$147,%xmm1,%xmm1
+	pshufd	$57,%xmm3,%xmm3
+	decq	%r8
+	jnz	.Loop_ssse3
+	paddd	0(%rsp),%xmm0
+	paddd	16(%rsp),%xmm1
+	paddd	32(%rsp),%xmm2
+	paddd	48(%rsp),%xmm3
+
+	cmpq	$64,%rdx
+	jb	.Ltail_ssse3
+
+	movdqu	0(%rsi),%xmm4
+	movdqu	16(%rsi),%xmm5
+	pxor	%xmm4,%xmm0
+	movdqu	32(%rsi),%xmm4
+	pxor	%xmm5,%xmm1
+	movdqu	48(%rsi),%xmm5
+	leaq	64(%rsi),%rsi
+	pxor	%xmm4,%xmm2
+	pxor	%xmm5,%xmm3
+
+	movdqu	%xmm0,0(%rdi)
+	movdqu	%xmm1,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm3,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	subq	$64,%rdx
+	jnz	.Loop_outer_ssse3
+
+	jmp	.Ldone_ssse3
+
+.align	16
+.Ltail_ssse3:
+	movdqa	%xmm0,0(%rsp)
+	movdqa	%xmm1,16(%rsp)
+	movdqa	%xmm2,32(%rsp)
+	movdqa	%xmm3,48(%rsp)
+	xorq	%r8,%r8
+
+.Loop_tail_ssse3:
+	movzbl	(%rsi,%r8,1),%eax
+	movzbl	(%rsp,%r8,1),%ecx
+	leaq	1(%r8),%r8
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r8,1)
+	decq	%rdx
+	jnz	.Loop_tail_ssse3
+
+.Ldone_ssse3:
+	leaq	-8(%r10),%rsp
+
+.Lssse3_epilogue:
+	ret
+
+.align	32
+.Lchacha20_4x:
+	leaq	8(%rsp),%r10
+
+.Lproceed4x:
+	subq	$0x140+8,%rsp
+	andq	$-32,%rsp
+	movdqa	.Lsigma(%rip),%xmm11
+	movdqu	(%rcx),%xmm15
+	movdqu	16(%rcx),%xmm7
+	movdqu	(%r8),%xmm3
+	leaq	256(%rsp),%rcx
+	leaq	.Lrot16(%rip),%r9
+	leaq	.Lrot24(%rip),%r11
+
+	pshufd	$0x00,%xmm11,%xmm8
+	pshufd	$0x55,%xmm11,%xmm9
+	movdqa	%xmm8,64(%rsp)
+	pshufd	$0xaa,%xmm11,%xmm10
+	movdqa	%xmm9,80(%rsp)
+	pshufd	$0xff,%xmm11,%xmm11
+	movdqa	%xmm10,96(%rsp)
+	movdqa	%xmm11,112(%rsp)
+
+	pshufd	$0x00,%xmm15,%xmm12
+	pshufd	$0x55,%xmm15,%xmm13
+	movdqa	%xmm12,128-256(%rcx)
+	pshufd	$0xaa,%xmm15,%xmm14
+	movdqa	%xmm13,144-256(%rcx)
+	pshufd	$0xff,%xmm15,%xmm15
+	movdqa	%xmm14,160-256(%rcx)
+	movdqa	%xmm15,176-256(%rcx)
+
+	pshufd	$0x00,%xmm7,%xmm4
+	pshufd	$0x55,%xmm7,%xmm5
+	movdqa	%xmm4,192-256(%rcx)
+	pshufd	$0xaa,%xmm7,%xmm6
+	movdqa	%xmm5,208-256(%rcx)
+	pshufd	$0xff,%xmm7,%xmm7
+	movdqa	%xmm6,224-256(%rcx)
+	movdqa	%xmm7,240-256(%rcx)
+
+	pshufd	$0x00,%xmm3,%xmm0
+	pshufd	$0x55,%xmm3,%xmm1
+	paddd	.Linc(%rip),%xmm0
+	pshufd	$0xaa,%xmm3,%xmm2
+	movdqa	%xmm1,272-256(%rcx)
+	pshufd	$0xff,%xmm3,%xmm3
+	movdqa	%xmm2,288-256(%rcx)
+	movdqa	%xmm3,304-256(%rcx)
+
+	jmp	.Loop_enter4x
+
+.align	32
+.Loop_outer4x:
+	movdqa	64(%rsp),%xmm8
+	movdqa	80(%rsp),%xmm9
+	movdqa	96(%rsp),%xmm10
+	movdqa	112(%rsp),%xmm11
+	movdqa	128-256(%rcx),%xmm12
+	movdqa	144-256(%rcx),%xmm13
+	movdqa	160-256(%rcx),%xmm14
+	movdqa	176-256(%rcx),%xmm15
+	movdqa	192-256(%rcx),%xmm4
+	movdqa	208-256(%rcx),%xmm5
+	movdqa	224-256(%rcx),%xmm6
+	movdqa	240-256(%rcx),%xmm7
+	movdqa	256-256(%rcx),%xmm0
+	movdqa	272-256(%rcx),%xmm1
+	movdqa	288-256(%rcx),%xmm2
+	movdqa	304-256(%rcx),%xmm3
+	paddd	.Lfour(%rip),%xmm0
+
+.Loop_enter4x:
+	movdqa	%xmm6,32(%rsp)
+	movdqa	%xmm7,48(%rsp)
+	movdqa	(%r9),%xmm7
+	movl	$10,%eax
+	movdqa	%xmm0,256-256(%rcx)
+	jmp	.Loop4x
+
+.align	32
+.Loop4x:
+	paddd	%xmm12,%xmm8
+	paddd	%xmm13,%xmm9
+	pxor	%xmm8,%xmm0
+	pxor	%xmm9,%xmm1
+	pshufb	%xmm7,%xmm0
+	pshufb	%xmm7,%xmm1
+	paddd	%xmm0,%xmm4
+	paddd	%xmm1,%xmm5
+	pxor	%xmm4,%xmm12
+	pxor	%xmm5,%xmm13
+	movdqa	%xmm12,%xmm6
+	pslld	$12,%xmm12
+	psrld	$20,%xmm6
+	movdqa	%xmm13,%xmm7
+	pslld	$12,%xmm13
+	por	%xmm6,%xmm12
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm13
+	paddd	%xmm12,%xmm8
+	paddd	%xmm13,%xmm9
+	pxor	%xmm8,%xmm0
+	pxor	%xmm9,%xmm1
+	pshufb	%xmm6,%xmm0
+	pshufb	%xmm6,%xmm1
+	paddd	%xmm0,%xmm4
+	paddd	%xmm1,%xmm5
+	pxor	%xmm4,%xmm12
+	pxor	%xmm5,%xmm13
+	movdqa	%xmm12,%xmm7
+	pslld	$7,%xmm12
+	psrld	$25,%xmm7
+	movdqa	%xmm13,%xmm6
+	pslld	$7,%xmm13
+	por	%xmm7,%xmm12
+	psrld	$25,%xmm6
+	movdqa	(%r9),%xmm7
+	por	%xmm6,%xmm13
+	movdqa	%xmm4,0(%rsp)
+	movdqa	%xmm5,16(%rsp)
+	movdqa	32(%rsp),%xmm4
+	movdqa	48(%rsp),%xmm5
+	paddd	%xmm14,%xmm10
+	paddd	%xmm15,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm3
+	pshufb	%xmm7,%xmm2
+	pshufb	%xmm7,%xmm3
+	paddd	%xmm2,%xmm4
+	paddd	%xmm3,%xmm5
+	pxor	%xmm4,%xmm14
+	pxor	%xmm5,%xmm15
+	movdqa	%xmm14,%xmm6
+	pslld	$12,%xmm14
+	psrld	$20,%xmm6
+	movdqa	%xmm15,%xmm7
+	pslld	$12,%xmm15
+	por	%xmm6,%xmm14
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm15
+	paddd	%xmm14,%xmm10
+	paddd	%xmm15,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm3
+	pshufb	%xmm6,%xmm2
+	pshufb	%xmm6,%xmm3
+	paddd	%xmm2,%xmm4
+	paddd	%xmm3,%xmm5
+	pxor	%xmm4,%xmm14
+	pxor	%xmm5,%xmm15
+	movdqa	%xmm14,%xmm7
+	pslld	$7,%xmm14
+	psrld	$25,%xmm7
+	movdqa	%xmm15,%xmm6
+	pslld	$7,%xmm15
+	por	%xmm7,%xmm14
+	psrld	$25,%xmm6
+	movdqa	(%r9),%xmm7
+	por	%xmm6,%xmm15
+	paddd	%xmm13,%xmm8
+	paddd	%xmm14,%xmm9
+	pxor	%xmm8,%xmm3
+	pxor	%xmm9,%xmm0
+	pshufb	%xmm7,%xmm3
+	pshufb	%xmm7,%xmm0
+	paddd	%xmm3,%xmm4
+	paddd	%xmm0,%xmm5
+	pxor	%xmm4,%xmm13
+	pxor	%xmm5,%xmm14
+	movdqa	%xmm13,%xmm6
+	pslld	$12,%xmm13
+	psrld	$20,%xmm6
+	movdqa	%xmm14,%xmm7
+	pslld	$12,%xmm14
+	por	%xmm6,%xmm13
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm14
+	paddd	%xmm13,%xmm8
+	paddd	%xmm14,%xmm9
+	pxor	%xmm8,%xmm3
+	pxor	%xmm9,%xmm0
+	pshufb	%xmm6,%xmm3
+	pshufb	%xmm6,%xmm0
+	paddd	%xmm3,%xmm4
+	paddd	%xmm0,%xmm5
+	pxor	%xmm4,%xmm13
+	pxor	%xmm5,%xmm14
+	movdqa	%xmm13,%xmm7
+	pslld	$7,%xmm13
+	psrld	$25,%xmm7
+	movdqa	%xmm14,%xmm6
+	pslld	$7,%xmm14
+	por	%xmm7,%xmm13
+	psrld	$25,%xmm6
+	movdqa	(%r9),%xmm7
+	por	%xmm6,%xmm14
+	movdqa	%xmm4,32(%rsp)
+	movdqa	%xmm5,48(%rsp)
+	movdqa	0(%rsp),%xmm4
+	movdqa	16(%rsp),%xmm5
+	paddd	%xmm15,%xmm10
+	paddd	%xmm12,%xmm11
+	pxor	%xmm10,%xmm1
+	pxor	%xmm11,%xmm2
+	pshufb	%xmm7,%xmm1
+	pshufb	%xmm7,%xmm2
+	paddd	%xmm1,%xmm4
+	paddd	%xmm2,%xmm5
+	pxor	%xmm4,%xmm15
+	pxor	%xmm5,%xmm12
+	movdqa	%xmm15,%xmm6
+	pslld	$12,%xmm15
+	psrld	$20,%xmm6
+	movdqa	%xmm12,%xmm7
+	pslld	$12,%xmm12
+	por	%xmm6,%xmm15
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm12
+	paddd	%xmm15,%xmm10
+	paddd	%xmm12,%xmm11
+	pxor	%xmm10,%xmm1
+	pxor	%xmm11,%xmm2
+	pshufb	%xmm6,%xmm1
+	pshufb	%xmm6,%xmm2
+	paddd	%xmm1,%xmm4
+	paddd	%xmm2,%xmm5
+	pxor	%xmm4,%xmm15
+	pxor	%xmm5,%xmm12
+	movdqa	%xmm15,%xmm7
+	pslld	$7,%xmm15
+	psrld	$25,%xmm7
+	movdqa	%xmm12,%xmm6
+	pslld	$7,%xmm12
+	por	%xmm7,%xmm15
+	psrld	$25,%xmm6
+	movdqa	(%r9),%xmm7
+	por	%xmm6,%xmm12
+	decl	%eax
+	jnz	.Loop4x
+
+	paddd	64(%rsp),%xmm8
+	paddd	80(%rsp),%xmm9
+	paddd	96(%rsp),%xmm10
+	paddd	112(%rsp),%xmm11
+
+	movdqa	%xmm8,%xmm6
+	punpckldq	%xmm9,%xmm8
+	movdqa	%xmm10,%xmm7
+	punpckldq	%xmm11,%xmm10
+	punpckhdq	%xmm9,%xmm6
+	punpckhdq	%xmm11,%xmm7
+	movdqa	%xmm8,%xmm9
+	punpcklqdq	%xmm10,%xmm8
+	movdqa	%xmm6,%xmm11
+	punpcklqdq	%xmm7,%xmm6
+	punpckhqdq	%xmm10,%xmm9
+	punpckhqdq	%xmm7,%xmm11
+	paddd	128-256(%rcx),%xmm12
+	paddd	144-256(%rcx),%xmm13
+	paddd	160-256(%rcx),%xmm14
+	paddd	176-256(%rcx),%xmm15
+
+	movdqa	%xmm8,0(%rsp)
+	movdqa	%xmm9,16(%rsp)
+	movdqa	32(%rsp),%xmm8
+	movdqa	48(%rsp),%xmm9
+
+	movdqa	%xmm12,%xmm10
+	punpckldq	%xmm13,%xmm12
+	movdqa	%xmm14,%xmm7
+	punpckldq	%xmm15,%xmm14
+	punpckhdq	%xmm13,%xmm10
+	punpckhdq	%xmm15,%xmm7
+	movdqa	%xmm12,%xmm13
+	punpcklqdq	%xmm14,%xmm12
+	movdqa	%xmm10,%xmm15
+	punpcklqdq	%xmm7,%xmm10
+	punpckhqdq	%xmm14,%xmm13
+	punpckhqdq	%xmm7,%xmm15
+	paddd	192-256(%rcx),%xmm4
+	paddd	208-256(%rcx),%xmm5
+	paddd	224-256(%rcx),%xmm8
+	paddd	240-256(%rcx),%xmm9
+
+	movdqa	%xmm6,32(%rsp)
+	movdqa	%xmm11,48(%rsp)
+
+	movdqa	%xmm4,%xmm14
+	punpckldq	%xmm5,%xmm4
+	movdqa	%xmm8,%xmm7
+	punpckldq	%xmm9,%xmm8
+	punpckhdq	%xmm5,%xmm14
+	punpckhdq	%xmm9,%xmm7
+	movdqa	%xmm4,%xmm5
+	punpcklqdq	%xmm8,%xmm4
+	movdqa	%xmm14,%xmm9
+	punpcklqdq	%xmm7,%xmm14
+	punpckhqdq	%xmm8,%xmm5
+	punpckhqdq	%xmm7,%xmm9
+	paddd	256-256(%rcx),%xmm0
+	paddd	272-256(%rcx),%xmm1
+	paddd	288-256(%rcx),%xmm2
+	paddd	304-256(%rcx),%xmm3
+
+	movdqa	%xmm0,%xmm8
+	punpckldq	%xmm1,%xmm0
+	movdqa	%xmm2,%xmm7
+	punpckldq	%xmm3,%xmm2
+	punpckhdq	%xmm1,%xmm8
+	punpckhdq	%xmm3,%xmm7
+	movdqa	%xmm0,%xmm1
+	punpcklqdq	%xmm2,%xmm0
+	movdqa	%xmm8,%xmm3
+	punpcklqdq	%xmm7,%xmm8
+	punpckhqdq	%xmm2,%xmm1
+	punpckhqdq	%xmm7,%xmm3
+	cmpq	$256,%rdx
+	jb	.Ltail4x
+
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+
+	movdqu	%xmm6,64(%rdi)
+	movdqu	0(%rsi),%xmm6
+	movdqu	%xmm11,80(%rdi)
+	movdqu	16(%rsi),%xmm11
+	movdqu	%xmm2,96(%rdi)
+	movdqu	32(%rsi),%xmm2
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+	movdqu	48(%rsi),%xmm7
+	pxor	32(%rsp),%xmm6
+	pxor	%xmm10,%xmm11
+	pxor	%xmm14,%xmm2
+	pxor	%xmm8,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	48(%rsp),%xmm6
+	pxor	%xmm15,%xmm11
+	pxor	%xmm9,%xmm2
+	pxor	%xmm3,%xmm7
+	movdqu	%xmm6,64(%rdi)
+	movdqu	%xmm11,80(%rdi)
+	movdqu	%xmm2,96(%rdi)
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+
+	subq	$256,%rdx
+	jnz	.Loop_outer4x
+
+	jmp	.Ldone4x
+
+.Ltail4x:
+	cmpq	$192,%rdx
+	jae	.L192_or_more4x
+	cmpq	$128,%rdx
+	jae	.L128_or_more4x
+	cmpq	$64,%rdx
+	jae	.L64_or_more4x
+
+
+	xorq	%r9,%r9
+
+	movdqa	%xmm12,16(%rsp)
+	movdqa	%xmm4,32(%rsp)
+	movdqa	%xmm0,48(%rsp)
+	jmp	.Loop_tail4x
+
+.align	32
+.L64_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+	movdqu	%xmm6,0(%rdi)
+	movdqu	%xmm11,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm7,48(%rdi)
+	je	.Ldone4x
+
+	movdqa	16(%rsp),%xmm6
+	leaq	64(%rsi),%rsi
+	xorq	%r9,%r9
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm13,16(%rsp)
+	leaq	64(%rdi),%rdi
+	movdqa	%xmm5,32(%rsp)
+	subq	$64,%rdx
+	movdqa	%xmm1,48(%rsp)
+	jmp	.Loop_tail4x
+
+.align	32
+.L128_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+	movdqu	%xmm6,64(%rdi)
+	movdqu	%xmm11,80(%rdi)
+	movdqu	%xmm2,96(%rdi)
+	movdqu	%xmm7,112(%rdi)
+	je	.Ldone4x
+
+	movdqa	32(%rsp),%xmm6
+	leaq	128(%rsi),%rsi
+	xorq	%r9,%r9
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm10,16(%rsp)
+	leaq	128(%rdi),%rdi
+	movdqa	%xmm14,32(%rsp)
+	subq	$128,%rdx
+	movdqa	%xmm8,48(%rsp)
+	jmp	.Loop_tail4x
+
+.align	32
+.L192_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+
+	movdqu	%xmm6,64(%rdi)
+	movdqu	0(%rsi),%xmm6
+	movdqu	%xmm11,80(%rdi)
+	movdqu	16(%rsi),%xmm11
+	movdqu	%xmm2,96(%rdi)
+	movdqu	32(%rsi),%xmm2
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+	movdqu	48(%rsi),%xmm7
+	pxor	32(%rsp),%xmm6
+	pxor	%xmm10,%xmm11
+	pxor	%xmm14,%xmm2
+	pxor	%xmm8,%xmm7
+	movdqu	%xmm6,0(%rdi)
+	movdqu	%xmm11,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm7,48(%rdi)
+	je	.Ldone4x
+
+	movdqa	48(%rsp),%xmm6
+	leaq	64(%rsi),%rsi
+	xorq	%r9,%r9
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm15,16(%rsp)
+	leaq	64(%rdi),%rdi
+	movdqa	%xmm9,32(%rsp)
+	subq	$192,%rdx
+	movdqa	%xmm3,48(%rsp)
+
+.Loop_tail4x:
+	movzbl	(%rsi,%r9,1),%eax
+	movzbl	(%rsp,%r9,1),%ecx
+	leaq	1(%r9),%r9
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r9,1)
+	decq	%rdx
+	jnz	.Loop_tail4x
+
+.Ldone4x:
+	leaq	-8(%r10),%rsp
+
+.L4x_epilogue:
+	ret
+ENDPROC(chacha20_ssse3)
+#endif /* CONFIG_AS_SSSE3 */
+
+#ifdef CONFIG_AS_AVX2
+.align	32
+ENTRY(chacha20_avx2)
+.Lchacha20_avx2:
+	cmpq	$0,%rdx
+	je	.L8x_epilogue
+	leaq	8(%rsp),%r10
+
+	subq	$0x280+8,%rsp
+	andq	$-32,%rsp
+	vzeroupper
+
+	vbroadcasti128	.Lsigma(%rip),%ymm11
+	vbroadcasti128	(%rcx),%ymm3
+	vbroadcasti128	16(%rcx),%ymm15
+	vbroadcasti128	(%r8),%ymm7
+	leaq	256(%rsp),%rcx
+	leaq	512(%rsp),%rax
+	leaq	.Lrot16(%rip),%r9
+	leaq	.Lrot24(%rip),%r11
+
+	vpshufd	$0x00,%ymm11,%ymm8
+	vpshufd	$0x55,%ymm11,%ymm9
+	vmovdqa	%ymm8,128-256(%rcx)
+	vpshufd	$0xaa,%ymm11,%ymm10
+	vmovdqa	%ymm9,160-256(%rcx)
+	vpshufd	$0xff,%ymm11,%ymm11
+	vmovdqa	%ymm10,192-256(%rcx)
+	vmovdqa	%ymm11,224-256(%rcx)
+
+	vpshufd	$0x00,%ymm3,%ymm0
+	vpshufd	$0x55,%ymm3,%ymm1
+	vmovdqa	%ymm0,256-256(%rcx)
+	vpshufd	$0xaa,%ymm3,%ymm2
+	vmovdqa	%ymm1,288-256(%rcx)
+	vpshufd	$0xff,%ymm3,%ymm3
+	vmovdqa	%ymm2,320-256(%rcx)
+	vmovdqa	%ymm3,352-256(%rcx)
+
+	vpshufd	$0x00,%ymm15,%ymm12
+	vpshufd	$0x55,%ymm15,%ymm13
+	vmovdqa	%ymm12,384-512(%rax)
+	vpshufd	$0xaa,%ymm15,%ymm14
+	vmovdqa	%ymm13,416-512(%rax)
+	vpshufd	$0xff,%ymm15,%ymm15
+	vmovdqa	%ymm14,448-512(%rax)
+	vmovdqa	%ymm15,480-512(%rax)
+
+	vpshufd	$0x00,%ymm7,%ymm4
+	vpshufd	$0x55,%ymm7,%ymm5
+	vpaddd	.Lincy(%rip),%ymm4,%ymm4
+	vpshufd	$0xaa,%ymm7,%ymm6
+	vmovdqa	%ymm5,544-512(%rax)
+	vpshufd	$0xff,%ymm7,%ymm7
+	vmovdqa	%ymm6,576-512(%rax)
+	vmovdqa	%ymm7,608-512(%rax)
+
+	jmp	.Loop_enter8x
+
+.align	32
+.Loop_outer8x:
+	vmovdqa	128-256(%rcx),%ymm8
+	vmovdqa	160-256(%rcx),%ymm9
+	vmovdqa	192-256(%rcx),%ymm10
+	vmovdqa	224-256(%rcx),%ymm11
+	vmovdqa	256-256(%rcx),%ymm0
+	vmovdqa	288-256(%rcx),%ymm1
+	vmovdqa	320-256(%rcx),%ymm2
+	vmovdqa	352-256(%rcx),%ymm3
+	vmovdqa	384-512(%rax),%ymm12
+	vmovdqa	416-512(%rax),%ymm13
+	vmovdqa	448-512(%rax),%ymm14
+	vmovdqa	480-512(%rax),%ymm15
+	vmovdqa	512-512(%rax),%ymm4
+	vmovdqa	544-512(%rax),%ymm5
+	vmovdqa	576-512(%rax),%ymm6
+	vmovdqa	608-512(%rax),%ymm7
+	vpaddd	.Leight(%rip),%ymm4,%ymm4
+
+.Loop_enter8x:
+	vmovdqa	%ymm14,64(%rsp)
+	vmovdqa	%ymm15,96(%rsp)
+	vbroadcasti128	(%r9),%ymm15
+	vmovdqa	%ymm4,512-512(%rax)
+	movl	$10,%eax
+	jmp	.Loop8x
+
+.align	32
+.Loop8x:
+	vpaddd	%ymm0,%ymm8,%ymm8
+	vpxor	%ymm4,%ymm8,%ymm4
+	vpshufb	%ymm15,%ymm4,%ymm4
+	vpaddd	%ymm1,%ymm9,%ymm9
+	vpxor	%ymm5,%ymm9,%ymm5
+	vpshufb	%ymm15,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm12,%ymm12
+	vpxor	%ymm0,%ymm12,%ymm0
+	vpslld	$12,%ymm0,%ymm14
+	vpsrld	$20,%ymm0,%ymm0
+	vpor	%ymm0,%ymm14,%ymm0
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm5,%ymm13,%ymm13
+	vpxor	%ymm1,%ymm13,%ymm1
+	vpslld	$12,%ymm1,%ymm15
+	vpsrld	$20,%ymm1,%ymm1
+	vpor	%ymm1,%ymm15,%ymm1
+	vpaddd	%ymm0,%ymm8,%ymm8
+	vpxor	%ymm4,%ymm8,%ymm4
+	vpshufb	%ymm14,%ymm4,%ymm4
+	vpaddd	%ymm1,%ymm9,%ymm9
+	vpxor	%ymm5,%ymm9,%ymm5
+	vpshufb	%ymm14,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm12,%ymm12
+	vpxor	%ymm0,%ymm12,%ymm0
+	vpslld	$7,%ymm0,%ymm15
+	vpsrld	$25,%ymm0,%ymm0
+	vpor	%ymm0,%ymm15,%ymm0
+	vbroadcasti128	(%r9),%ymm15
+	vpaddd	%ymm5,%ymm13,%ymm13
+	vpxor	%ymm1,%ymm13,%ymm1
+	vpslld	$7,%ymm1,%ymm14
+	vpsrld	$25,%ymm1,%ymm1
+	vpor	%ymm1,%ymm14,%ymm1
+	vmovdqa	%ymm12,0(%rsp)
+	vmovdqa	%ymm13,32(%rsp)
+	vmovdqa	64(%rsp),%ymm12
+	vmovdqa	96(%rsp),%ymm13
+	vpaddd	%ymm2,%ymm10,%ymm10
+	vpxor	%ymm6,%ymm10,%ymm6
+	vpshufb	%ymm15,%ymm6,%ymm6
+	vpaddd	%ymm3,%ymm11,%ymm11
+	vpxor	%ymm7,%ymm11,%ymm7
+	vpshufb	%ymm15,%ymm7,%ymm7
+	vpaddd	%ymm6,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm12,%ymm2
+	vpslld	$12,%ymm2,%ymm14
+	vpsrld	$20,%ymm2,%ymm2
+	vpor	%ymm2,%ymm14,%ymm2
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm7,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm13,%ymm3
+	vpslld	$12,%ymm3,%ymm15
+	vpsrld	$20,%ymm3,%ymm3
+	vpor	%ymm3,%ymm15,%ymm3
+	vpaddd	%ymm2,%ymm10,%ymm10
+	vpxor	%ymm6,%ymm10,%ymm6
+	vpshufb	%ymm14,%ymm6,%ymm6
+	vpaddd	%ymm3,%ymm11,%ymm11
+	vpxor	%ymm7,%ymm11,%ymm7
+	vpshufb	%ymm14,%ymm7,%ymm7
+	vpaddd	%ymm6,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm12,%ymm2
+	vpslld	$7,%ymm2,%ymm15
+	vpsrld	$25,%ymm2,%ymm2
+	vpor	%ymm2,%ymm15,%ymm2
+	vbroadcasti128	(%r9),%ymm15
+	vpaddd	%ymm7,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm13,%ymm3
+	vpslld	$7,%ymm3,%ymm14
+	vpsrld	$25,%ymm3,%ymm3
+	vpor	%ymm3,%ymm14,%ymm3
+	vpaddd	%ymm1,%ymm8,%ymm8
+	vpxor	%ymm7,%ymm8,%ymm7
+	vpshufb	%ymm15,%ymm7,%ymm7
+	vpaddd	%ymm2,%ymm9,%ymm9
+	vpxor	%ymm4,%ymm9,%ymm4
+	vpshufb	%ymm15,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm12,%ymm1
+	vpslld	$12,%ymm1,%ymm14
+	vpsrld	$20,%ymm1,%ymm1
+	vpor	%ymm1,%ymm14,%ymm1
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm4,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm13,%ymm2
+	vpslld	$12,%ymm2,%ymm15
+	vpsrld	$20,%ymm2,%ymm2
+	vpor	%ymm2,%ymm15,%ymm2
+	vpaddd	%ymm1,%ymm8,%ymm8
+	vpxor	%ymm7,%ymm8,%ymm7
+	vpshufb	%ymm14,%ymm7,%ymm7
+	vpaddd	%ymm2,%ymm9,%ymm9
+	vpxor	%ymm4,%ymm9,%ymm4
+	vpshufb	%ymm14,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm12,%ymm1
+	vpslld	$7,%ymm1,%ymm15
+	vpsrld	$25,%ymm1,%ymm1
+	vpor	%ymm1,%ymm15,%ymm1
+	vbroadcasti128	(%r9),%ymm15
+	vpaddd	%ymm4,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm13,%ymm2
+	vpslld	$7,%ymm2,%ymm14
+	vpsrld	$25,%ymm2,%ymm2
+	vpor	%ymm2,%ymm14,%ymm2
+	vmovdqa	%ymm12,64(%rsp)
+	vmovdqa	%ymm13,96(%rsp)
+	vmovdqa	0(%rsp),%ymm12
+	vmovdqa	32(%rsp),%ymm13
+	vpaddd	%ymm3,%ymm10,%ymm10
+	vpxor	%ymm5,%ymm10,%ymm5
+	vpshufb	%ymm15,%ymm5,%ymm5
+	vpaddd	%ymm0,%ymm11,%ymm11
+	vpxor	%ymm6,%ymm11,%ymm6
+	vpshufb	%ymm15,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm12,%ymm12
+	vpxor	%ymm3,%ymm12,%ymm3
+	vpslld	$12,%ymm3,%ymm14
+	vpsrld	$20,%ymm3,%ymm3
+	vpor	%ymm3,%ymm14,%ymm3
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm6,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm13,%ymm0
+	vpslld	$12,%ymm0,%ymm15
+	vpsrld	$20,%ymm0,%ymm0
+	vpor	%ymm0,%ymm15,%ymm0
+	vpaddd	%ymm3,%ymm10,%ymm10
+	vpxor	%ymm5,%ymm10,%ymm5
+	vpshufb	%ymm14,%ymm5,%ymm5
+	vpaddd	%ymm0,%ymm11,%ymm11
+	vpxor	%ymm6,%ymm11,%ymm6
+	vpshufb	%ymm14,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm12,%ymm12
+	vpxor	%ymm3,%ymm12,%ymm3
+	vpslld	$7,%ymm3,%ymm15
+	vpsrld	$25,%ymm3,%ymm3
+	vpor	%ymm3,%ymm15,%ymm3
+	vbroadcasti128	(%r9),%ymm15
+	vpaddd	%ymm6,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm13,%ymm0
+	vpslld	$7,%ymm0,%ymm14
+	vpsrld	$25,%ymm0,%ymm0
+	vpor	%ymm0,%ymm14,%ymm0
+	decl	%eax
+	jnz	.Loop8x
+
+	leaq	512(%rsp),%rax
+	vpaddd	128-256(%rcx),%ymm8,%ymm8
+	vpaddd	160-256(%rcx),%ymm9,%ymm9
+	vpaddd	192-256(%rcx),%ymm10,%ymm10
+	vpaddd	224-256(%rcx),%ymm11,%ymm11
+
+	vpunpckldq	%ymm9,%ymm8,%ymm14
+	vpunpckldq	%ymm11,%ymm10,%ymm15
+	vpunpckhdq	%ymm9,%ymm8,%ymm8
+	vpunpckhdq	%ymm11,%ymm10,%ymm10
+	vpunpcklqdq	%ymm15,%ymm14,%ymm9
+	vpunpckhqdq	%ymm15,%ymm14,%ymm14
+	vpunpcklqdq	%ymm10,%ymm8,%ymm11
+	vpunpckhqdq	%ymm10,%ymm8,%ymm8
+	vpaddd	256-256(%rcx),%ymm0,%ymm0
+	vpaddd	288-256(%rcx),%ymm1,%ymm1
+	vpaddd	320-256(%rcx),%ymm2,%ymm2
+	vpaddd	352-256(%rcx),%ymm3,%ymm3
+
+	vpunpckldq	%ymm1,%ymm0,%ymm10
+	vpunpckldq	%ymm3,%ymm2,%ymm15
+	vpunpckhdq	%ymm1,%ymm0,%ymm0
+	vpunpckhdq	%ymm3,%ymm2,%ymm2
+	vpunpcklqdq	%ymm15,%ymm10,%ymm1
+	vpunpckhqdq	%ymm15,%ymm10,%ymm10
+	vpunpcklqdq	%ymm2,%ymm0,%ymm3
+	vpunpckhqdq	%ymm2,%ymm0,%ymm0
+	vperm2i128	$0x20,%ymm1,%ymm9,%ymm15
+	vperm2i128	$0x31,%ymm1,%ymm9,%ymm1
+	vperm2i128	$0x20,%ymm10,%ymm14,%ymm9
+	vperm2i128	$0x31,%ymm10,%ymm14,%ymm10
+	vperm2i128	$0x20,%ymm3,%ymm11,%ymm14
+	vperm2i128	$0x31,%ymm3,%ymm11,%ymm3
+	vperm2i128	$0x20,%ymm0,%ymm8,%ymm11
+	vperm2i128	$0x31,%ymm0,%ymm8,%ymm0
+	vmovdqa	%ymm15,0(%rsp)
+	vmovdqa	%ymm9,32(%rsp)
+	vmovdqa	64(%rsp),%ymm15
+	vmovdqa	96(%rsp),%ymm9
+
+	vpaddd	384-512(%rax),%ymm12,%ymm12
+	vpaddd	416-512(%rax),%ymm13,%ymm13
+	vpaddd	448-512(%rax),%ymm15,%ymm15
+	vpaddd	480-512(%rax),%ymm9,%ymm9
+
+	vpunpckldq	%ymm13,%ymm12,%ymm2
+	vpunpckldq	%ymm9,%ymm15,%ymm8
+	vpunpckhdq	%ymm13,%ymm12,%ymm12
+	vpunpckhdq	%ymm9,%ymm15,%ymm15
+	vpunpcklqdq	%ymm8,%ymm2,%ymm13
+	vpunpckhqdq	%ymm8,%ymm2,%ymm2
+	vpunpcklqdq	%ymm15,%ymm12,%ymm9
+	vpunpckhqdq	%ymm15,%ymm12,%ymm12
+	vpaddd	512-512(%rax),%ymm4,%ymm4
+	vpaddd	544-512(%rax),%ymm5,%ymm5
+	vpaddd	576-512(%rax),%ymm6,%ymm6
+	vpaddd	608-512(%rax),%ymm7,%ymm7
+
+	vpunpckldq	%ymm5,%ymm4,%ymm15
+	vpunpckldq	%ymm7,%ymm6,%ymm8
+	vpunpckhdq	%ymm5,%ymm4,%ymm4
+	vpunpckhdq	%ymm7,%ymm6,%ymm6
+	vpunpcklqdq	%ymm8,%ymm15,%ymm5
+	vpunpckhqdq	%ymm8,%ymm15,%ymm15
+	vpunpcklqdq	%ymm6,%ymm4,%ymm7
+	vpunpckhqdq	%ymm6,%ymm4,%ymm4
+	vperm2i128	$0x20,%ymm5,%ymm13,%ymm8
+	vperm2i128	$0x31,%ymm5,%ymm13,%ymm5
+	vperm2i128	$0x20,%ymm15,%ymm2,%ymm13
+	vperm2i128	$0x31,%ymm15,%ymm2,%ymm15
+	vperm2i128	$0x20,%ymm7,%ymm9,%ymm2
+	vperm2i128	$0x31,%ymm7,%ymm9,%ymm7
+	vperm2i128	$0x20,%ymm4,%ymm12,%ymm9
+	vperm2i128	$0x31,%ymm4,%ymm12,%ymm4
+	vmovdqa	0(%rsp),%ymm6
+	vmovdqa	32(%rsp),%ymm12
+
+	cmpq	$512,%rdx
+	jb	.Ltail8x
+
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm12,%ymm12
+	vpxor	32(%rsi),%ymm13,%ymm13
+	vpxor	64(%rsi),%ymm10,%ymm10
+	vpxor	96(%rsi),%ymm15,%ymm15
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm12,0(%rdi)
+	vmovdqu	%ymm13,32(%rdi)
+	vmovdqu	%ymm10,64(%rdi)
+	vmovdqu	%ymm15,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm14,%ymm14
+	vpxor	32(%rsi),%ymm2,%ymm2
+	vpxor	64(%rsi),%ymm3,%ymm3
+	vpxor	96(%rsi),%ymm7,%ymm7
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm14,0(%rdi)
+	vmovdqu	%ymm2,32(%rdi)
+	vmovdqu	%ymm3,64(%rdi)
+	vmovdqu	%ymm7,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm11,%ymm11
+	vpxor	32(%rsi),%ymm9,%ymm9
+	vpxor	64(%rsi),%ymm0,%ymm0
+	vpxor	96(%rsi),%ymm4,%ymm4
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm11,0(%rdi)
+	vmovdqu	%ymm9,32(%rdi)
+	vmovdqu	%ymm0,64(%rdi)
+	vmovdqu	%ymm4,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	subq	$512,%rdx
+	jnz	.Loop_outer8x
+
+	jmp	.Ldone8x
+
+.Ltail8x:
+	cmpq	$448,%rdx
+	jae	.L448_or_more8x
+	cmpq	$384,%rdx
+	jae	.L384_or_more8x
+	cmpq	$320,%rdx
+	jae	.L320_or_more8x
+	cmpq	$256,%rdx
+	jae	.L256_or_more8x
+	cmpq	$192,%rdx
+	jae	.L192_or_more8x
+	cmpq	$128,%rdx
+	jae	.L128_or_more8x
+	cmpq	$64,%rdx
+	jae	.L64_or_more8x
+
+	xorq	%r9,%r9
+	vmovdqa	%ymm6,0(%rsp)
+	vmovdqa	%ymm8,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L64_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	je	.Ldone8x
+
+	leaq	64(%rsi),%rsi
+	xorq	%r9,%r9
+	vmovdqa	%ymm1,0(%rsp)
+	leaq	64(%rdi),%rdi
+	subq	$64,%rdx
+	vmovdqa	%ymm5,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L128_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	je	.Ldone8x
+
+	leaq	128(%rsi),%rsi
+	xorq	%r9,%r9
+	vmovdqa	%ymm12,0(%rsp)
+	leaq	128(%rdi),%rdi
+	subq	$128,%rdx
+	vmovdqa	%ymm13,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L192_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	je	.Ldone8x
+
+	leaq	192(%rsi),%rsi
+	xorq	%r9,%r9
+	vmovdqa	%ymm10,0(%rsp)
+	leaq	192(%rdi),%rdi
+	subq	$192,%rdx
+	vmovdqa	%ymm15,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L256_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	je	.Ldone8x
+
+	leaq	256(%rsi),%rsi
+	xorq	%r9,%r9
+	vmovdqa	%ymm14,0(%rsp)
+	leaq	256(%rdi),%rdi
+	subq	$256,%rdx
+	vmovdqa	%ymm2,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L320_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	je	.Ldone8x
+
+	leaq	320(%rsi),%rsi
+	xorq	%r9,%r9
+	vmovdqa	%ymm3,0(%rsp)
+	leaq	320(%rdi),%rdi
+	subq	$320,%rdx
+	vmovdqa	%ymm7,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L384_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vpxor	320(%rsi),%ymm3,%ymm3
+	vpxor	352(%rsi),%ymm7,%ymm7
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	vmovdqu	%ymm3,320(%rdi)
+	vmovdqu	%ymm7,352(%rdi)
+	je	.Ldone8x
+
+	leaq	384(%rsi),%rsi
+	xorq	%r9,%r9
+	vmovdqa	%ymm11,0(%rsp)
+	leaq	384(%rdi),%rdi
+	subq	$384,%rdx
+	vmovdqa	%ymm9,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L448_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vpxor	320(%rsi),%ymm3,%ymm3
+	vpxor	352(%rsi),%ymm7,%ymm7
+	vpxor	384(%rsi),%ymm11,%ymm11
+	vpxor	416(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	vmovdqu	%ymm3,320(%rdi)
+	vmovdqu	%ymm7,352(%rdi)
+	vmovdqu	%ymm11,384(%rdi)
+	vmovdqu	%ymm9,416(%rdi)
+	je	.Ldone8x
+
+	leaq	448(%rsi),%rsi
+	xorq	%r9,%r9
+	vmovdqa	%ymm0,0(%rsp)
+	leaq	448(%rdi),%rdi
+	subq	$448,%rdx
+	vmovdqa	%ymm4,32(%rsp)
+
+.Loop_tail8x:
+	movzbl	(%rsi,%r9,1),%eax
+	movzbl	(%rsp,%r9,1),%ecx
+	leaq	1(%r9),%r9
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r9,1)
+	decq	%rdx
+	jnz	.Loop_tail8x
+
+.Ldone8x:
+	vzeroall
+	leaq	-8(%r10),%rsp
+
+.L8x_epilogue:
+	ret
+ENDPROC(chacha20_avx2)
+#endif /* CONFIG_AS_AVX2 */
+
+#ifdef CONFIG_AS_AVX512
+.align	32
+ENTRY(chacha20_avx512)
+.Lchacha20_avx512:
+	cmpq	$0,%rdx
+	je	.Lavx512_epilogue
+	leaq	8(%rsp),%r10
+
+	cmpq	$512,%rdx
+	ja	.Lchacha20_16x
+
+	subq	$64+8,%rsp
+	andq	$-64,%rsp
+	vbroadcasti32x4	.Lsigma(%rip),%zmm0
+	vbroadcasti32x4	(%rcx),%zmm1
+	vbroadcasti32x4	16(%rcx),%zmm2
+	vbroadcasti32x4	(%r8),%zmm3
+
+	vmovdqa32	%zmm0,%zmm16
+	vmovdqa32	%zmm1,%zmm17
+	vmovdqa32	%zmm2,%zmm18
+	vpaddd	.Lzeroz(%rip),%zmm3,%zmm3
+	vmovdqa32	.Lfourz(%rip),%zmm20
+	movq	$10,%r8
+	vmovdqa32	%zmm3,%zmm19
+	jmp	.Loop_avx512
+
+.align	16
+.Loop_outer_avx512:
+	vmovdqa32	%zmm16,%zmm0
+	vmovdqa32	%zmm17,%zmm1
+	vmovdqa32	%zmm18,%zmm2
+	vpaddd	%zmm20,%zmm19,%zmm3
+	movq	$10,%r8
+	vmovdqa32	%zmm3,%zmm19
+	jmp	.Loop_avx512
+
+.align	32
+.Loop_avx512:
+	vpaddd	%zmm1,%zmm0,%zmm0
+	vpxord	%zmm0,%zmm3,%zmm3
+	vprold	$16,%zmm3,%zmm3
+	vpaddd	%zmm3,%zmm2,%zmm2
+	vpxord	%zmm2,%zmm1,%zmm1
+	vprold	$12,%zmm1,%zmm1
+	vpaddd	%zmm1,%zmm0,%zmm0
+	vpxord	%zmm0,%zmm3,%zmm3
+	vprold	$8,%zmm3,%zmm3
+	vpaddd	%zmm3,%zmm2,%zmm2
+	vpxord	%zmm2,%zmm1,%zmm1
+	vprold	$7,%zmm1,%zmm1
+	vpshufd	$78,%zmm2,%zmm2
+	vpshufd	$57,%zmm1,%zmm1
+	vpshufd	$147,%zmm3,%zmm3
+	vpaddd	%zmm1,%zmm0,%zmm0
+	vpxord	%zmm0,%zmm3,%zmm3
+	vprold	$16,%zmm3,%zmm3
+	vpaddd	%zmm3,%zmm2,%zmm2
+	vpxord	%zmm2,%zmm1,%zmm1
+	vprold	$12,%zmm1,%zmm1
+	vpaddd	%zmm1,%zmm0,%zmm0
+	vpxord	%zmm0,%zmm3,%zmm3
+	vprold	$8,%zmm3,%zmm3
+	vpaddd	%zmm3,%zmm2,%zmm2
+	vpxord	%zmm2,%zmm1,%zmm1
+	vprold	$7,%zmm1,%zmm1
+	vpshufd	$78,%zmm2,%zmm2
+	vpshufd	$147,%zmm1,%zmm1
+	vpshufd	$57,%zmm3,%zmm3
+	decq	%r8
+	jnz	.Loop_avx512
+	vpaddd	%zmm16,%zmm0,%zmm0
+	vpaddd	%zmm17,%zmm1,%zmm1
+	vpaddd	%zmm18,%zmm2,%zmm2
+	vpaddd	%zmm19,%zmm3,%zmm3
+
+	subq	$64,%rdx
+	jb	.Ltail64_avx512
+
+	vpxor	0(%rsi),%xmm0,%xmm4
+	vpxor	16(%rsi),%xmm1,%xmm5
+	vpxor	32(%rsi),%xmm2,%xmm6
+	vpxor	48(%rsi),%xmm3,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	jz	.Ldone_avx512
+
+	vextracti32x4	$1,%zmm0,%xmm4
+	vextracti32x4	$1,%zmm1,%xmm5
+	vextracti32x4	$1,%zmm2,%xmm6
+	vextracti32x4	$1,%zmm3,%xmm7
+
+	subq	$64,%rdx
+	jb	.Ltail_avx512
+
+	vpxor	0(%rsi),%xmm4,%xmm4
+	vpxor	16(%rsi),%xmm5,%xmm5
+	vpxor	32(%rsi),%xmm6,%xmm6
+	vpxor	48(%rsi),%xmm7,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	jz	.Ldone_avx512
+
+	vextracti32x4	$2,%zmm0,%xmm4
+	vextracti32x4	$2,%zmm1,%xmm5
+	vextracti32x4	$2,%zmm2,%xmm6
+	vextracti32x4	$2,%zmm3,%xmm7
+
+	subq	$64,%rdx
+	jb	.Ltail_avx512
+
+	vpxor	0(%rsi),%xmm4,%xmm4
+	vpxor	16(%rsi),%xmm5,%xmm5
+	vpxor	32(%rsi),%xmm6,%xmm6
+	vpxor	48(%rsi),%xmm7,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	jz	.Ldone_avx512
+
+	vextracti32x4	$3,%zmm0,%xmm4
+	vextracti32x4	$3,%zmm1,%xmm5
+	vextracti32x4	$3,%zmm2,%xmm6
+	vextracti32x4	$3,%zmm3,%xmm7
+
+	subq	$64,%rdx
+	jb	.Ltail_avx512
+
+	vpxor	0(%rsi),%xmm4,%xmm4
+	vpxor	16(%rsi),%xmm5,%xmm5
+	vpxor	32(%rsi),%xmm6,%xmm6
+	vpxor	48(%rsi),%xmm7,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	jnz	.Loop_outer_avx512
+
+	jmp	.Ldone_avx512
+
+.align	16
+.Ltail64_avx512:
+	vmovdqa	%xmm0,0(%rsp)
+	vmovdqa	%xmm1,16(%rsp)
+	vmovdqa	%xmm2,32(%rsp)
+	vmovdqa	%xmm3,48(%rsp)
+	addq	$64,%rdx
+	jmp	.Loop_tail_avx512
+
+.align	16
+.Ltail_avx512:
+	vmovdqa	%xmm4,0(%rsp)
+	vmovdqa	%xmm5,16(%rsp)
+	vmovdqa	%xmm6,32(%rsp)
+	vmovdqa	%xmm7,48(%rsp)
+	addq	$64,%rdx
+
+.Loop_tail_avx512:
+	movzbl	(%rsi,%r8,1),%eax
+	movzbl	(%rsp,%r8,1),%ecx
+	leaq	1(%r8),%r8
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r8,1)
+	decq	%rdx
+	jnz	.Loop_tail_avx512
+
+	vmovdqa32	%zmm16,0(%rsp)
+
+.Ldone_avx512:
+	vzeroall
+	leaq	-8(%r10),%rsp
+
+.Lavx512_epilogue:
+	ret
+
+.align	32
+.Lchacha20_16x:
+	leaq	8(%rsp),%r10
+
+	subq	$64+8,%rsp
+	andq	$-64,%rsp
+	vzeroupper
+
+	leaq	.Lsigma(%rip),%r9
+	vbroadcasti32x4	(%r9),%zmm3
+	vbroadcasti32x4	(%rcx),%zmm7
+	vbroadcasti32x4	16(%rcx),%zmm11
+	vbroadcasti32x4	(%r8),%zmm15
+
+	vpshufd	$0x00,%zmm3,%zmm0
+	vpshufd	$0x55,%zmm3,%zmm1
+	vpshufd	$0xaa,%zmm3,%zmm2
+	vpshufd	$0xff,%zmm3,%zmm3
+	vmovdqa64	%zmm0,%zmm16
+	vmovdqa64	%zmm1,%zmm17
+	vmovdqa64	%zmm2,%zmm18
+	vmovdqa64	%zmm3,%zmm19
+
+	vpshufd	$0x00,%zmm7,%zmm4
+	vpshufd	$0x55,%zmm7,%zmm5
+	vpshufd	$0xaa,%zmm7,%zmm6
+	vpshufd	$0xff,%zmm7,%zmm7
+	vmovdqa64	%zmm4,%zmm20
+	vmovdqa64	%zmm5,%zmm21
+	vmovdqa64	%zmm6,%zmm22
+	vmovdqa64	%zmm7,%zmm23
+
+	vpshufd	$0x00,%zmm11,%zmm8
+	vpshufd	$0x55,%zmm11,%zmm9
+	vpshufd	$0xaa,%zmm11,%zmm10
+	vpshufd	$0xff,%zmm11,%zmm11
+	vmovdqa64	%zmm8,%zmm24
+	vmovdqa64	%zmm9,%zmm25
+	vmovdqa64	%zmm10,%zmm26
+	vmovdqa64	%zmm11,%zmm27
+
+	vpshufd	$0x00,%zmm15,%zmm12
+	vpshufd	$0x55,%zmm15,%zmm13
+	vpshufd	$0xaa,%zmm15,%zmm14
+	vpshufd	$0xff,%zmm15,%zmm15
+	vpaddd	.Lincz(%rip),%zmm12,%zmm12
+	vmovdqa64	%zmm12,%zmm28
+	vmovdqa64	%zmm13,%zmm29
+	vmovdqa64	%zmm14,%zmm30
+	vmovdqa64	%zmm15,%zmm31
+
+	movl	$10,%eax
+	jmp	.Loop16x
+
+.align	32
+.Loop_outer16x:
+	vpbroadcastd	0(%r9),%zmm0
+	vpbroadcastd	4(%r9),%zmm1
+	vpbroadcastd	8(%r9),%zmm2
+	vpbroadcastd	12(%r9),%zmm3
+	vpaddd	.Lsixteen(%rip),%zmm28,%zmm28
+	vmovdqa64	%zmm20,%zmm4
+	vmovdqa64	%zmm21,%zmm5
+	vmovdqa64	%zmm22,%zmm6
+	vmovdqa64	%zmm23,%zmm7
+	vmovdqa64	%zmm24,%zmm8
+	vmovdqa64	%zmm25,%zmm9
+	vmovdqa64	%zmm26,%zmm10
+	vmovdqa64	%zmm27,%zmm11
+	vmovdqa64	%zmm28,%zmm12
+	vmovdqa64	%zmm29,%zmm13
+	vmovdqa64	%zmm30,%zmm14
+	vmovdqa64	%zmm31,%zmm15
+
+	vmovdqa64	%zmm0,%zmm16
+	vmovdqa64	%zmm1,%zmm17
+	vmovdqa64	%zmm2,%zmm18
+	vmovdqa64	%zmm3,%zmm19
+
+	movl	$10,%eax
+	jmp	.Loop16x
+
+.align	32
+.Loop16x:
+	vpaddd	%zmm4,%zmm0,%zmm0
+	vpaddd	%zmm5,%zmm1,%zmm1
+	vpaddd	%zmm6,%zmm2,%zmm2
+	vpaddd	%zmm7,%zmm3,%zmm3
+	vpxord	%zmm0,%zmm12,%zmm12
+	vpxord	%zmm1,%zmm13,%zmm13
+	vpxord	%zmm2,%zmm14,%zmm14
+	vpxord	%zmm3,%zmm15,%zmm15
+	vprold	$16,%zmm12,%zmm12
+	vprold	$16,%zmm13,%zmm13
+	vprold	$16,%zmm14,%zmm14
+	vprold	$16,%zmm15,%zmm15
+	vpaddd	%zmm12,%zmm8,%zmm8
+	vpaddd	%zmm13,%zmm9,%zmm9
+	vpaddd	%zmm14,%zmm10,%zmm10
+	vpaddd	%zmm15,%zmm11,%zmm11
+	vpxord	%zmm8,%zmm4,%zmm4
+	vpxord	%zmm9,%zmm5,%zmm5
+	vpxord	%zmm10,%zmm6,%zmm6
+	vpxord	%zmm11,%zmm7,%zmm7
+	vprold	$12,%zmm4,%zmm4
+	vprold	$12,%zmm5,%zmm5
+	vprold	$12,%zmm6,%zmm6
+	vprold	$12,%zmm7,%zmm7
+	vpaddd	%zmm4,%zmm0,%zmm0
+	vpaddd	%zmm5,%zmm1,%zmm1
+	vpaddd	%zmm6,%zmm2,%zmm2
+	vpaddd	%zmm7,%zmm3,%zmm3
+	vpxord	%zmm0,%zmm12,%zmm12
+	vpxord	%zmm1,%zmm13,%zmm13
+	vpxord	%zmm2,%zmm14,%zmm14
+	vpxord	%zmm3,%zmm15,%zmm15
+	vprold	$8,%zmm12,%zmm12
+	vprold	$8,%zmm13,%zmm13
+	vprold	$8,%zmm14,%zmm14
+	vprold	$8,%zmm15,%zmm15
+	vpaddd	%zmm12,%zmm8,%zmm8
+	vpaddd	%zmm13,%zmm9,%zmm9
+	vpaddd	%zmm14,%zmm10,%zmm10
+	vpaddd	%zmm15,%zmm11,%zmm11
+	vpxord	%zmm8,%zmm4,%zmm4
+	vpxord	%zmm9,%zmm5,%zmm5
+	vpxord	%zmm10,%zmm6,%zmm6
+	vpxord	%zmm11,%zmm7,%zmm7
+	vprold	$7,%zmm4,%zmm4
+	vprold	$7,%zmm5,%zmm5
+	vprold	$7,%zmm6,%zmm6
+	vprold	$7,%zmm7,%zmm7
+	vpaddd	%zmm5,%zmm0,%zmm0
+	vpaddd	%zmm6,%zmm1,%zmm1
+	vpaddd	%zmm7,%zmm2,%zmm2
+	vpaddd	%zmm4,%zmm3,%zmm3
+	vpxord	%zmm0,%zmm15,%zmm15
+	vpxord	%zmm1,%zmm12,%zmm12
+	vpxord	%zmm2,%zmm13,%zmm13
+	vpxord	%zmm3,%zmm14,%zmm14
+	vprold	$16,%zmm15,%zmm15
+	vprold	$16,%zmm12,%zmm12
+	vprold	$16,%zmm13,%zmm13
+	vprold	$16,%zmm14,%zmm14
+	vpaddd	%zmm15,%zmm10,%zmm10
+	vpaddd	%zmm12,%zmm11,%zmm11
+	vpaddd	%zmm13,%zmm8,%zmm8
+	vpaddd	%zmm14,%zmm9,%zmm9
+	vpxord	%zmm10,%zmm5,%zmm5
+	vpxord	%zmm11,%zmm6,%zmm6
+	vpxord	%zmm8,%zmm7,%zmm7
+	vpxord	%zmm9,%zmm4,%zmm4
+	vprold	$12,%zmm5,%zmm5
+	vprold	$12,%zmm6,%zmm6
+	vprold	$12,%zmm7,%zmm7
+	vprold	$12,%zmm4,%zmm4
+	vpaddd	%zmm5,%zmm0,%zmm0
+	vpaddd	%zmm6,%zmm1,%zmm1
+	vpaddd	%zmm7,%zmm2,%zmm2
+	vpaddd	%zmm4,%zmm3,%zmm3
+	vpxord	%zmm0,%zmm15,%zmm15
+	vpxord	%zmm1,%zmm12,%zmm12
+	vpxord	%zmm2,%zmm13,%zmm13
+	vpxord	%zmm3,%zmm14,%zmm14
+	vprold	$8,%zmm15,%zmm15
+	vprold	$8,%zmm12,%zmm12
+	vprold	$8,%zmm13,%zmm13
+	vprold	$8,%zmm14,%zmm14
+	vpaddd	%zmm15,%zmm10,%zmm10
+	vpaddd	%zmm12,%zmm11,%zmm11
+	vpaddd	%zmm13,%zmm8,%zmm8
+	vpaddd	%zmm14,%zmm9,%zmm9
+	vpxord	%zmm10,%zmm5,%zmm5
+	vpxord	%zmm11,%zmm6,%zmm6
+	vpxord	%zmm8,%zmm7,%zmm7
+	vpxord	%zmm9,%zmm4,%zmm4
+	vprold	$7,%zmm5,%zmm5
+	vprold	$7,%zmm6,%zmm6
+	vprold	$7,%zmm7,%zmm7
+	vprold	$7,%zmm4,%zmm4
+	decl	%eax
+	jnz	.Loop16x
+
+	vpaddd	%zmm16,%zmm0,%zmm0
+	vpaddd	%zmm17,%zmm1,%zmm1
+	vpaddd	%zmm18,%zmm2,%zmm2
+	vpaddd	%zmm19,%zmm3,%zmm3
+
+	vpunpckldq	%zmm1,%zmm0,%zmm18
+	vpunpckldq	%zmm3,%zmm2,%zmm19
+	vpunpckhdq	%zmm1,%zmm0,%zmm0
+	vpunpckhdq	%zmm3,%zmm2,%zmm2
+	vpunpcklqdq	%zmm19,%zmm18,%zmm1
+	vpunpckhqdq	%zmm19,%zmm18,%zmm18
+	vpunpcklqdq	%zmm2,%zmm0,%zmm3
+	vpunpckhqdq	%zmm2,%zmm0,%zmm0
+	vpaddd	%zmm20,%zmm4,%zmm4
+	vpaddd	%zmm21,%zmm5,%zmm5
+	vpaddd	%zmm22,%zmm6,%zmm6
+	vpaddd	%zmm23,%zmm7,%zmm7
+
+	vpunpckldq	%zmm5,%zmm4,%zmm2
+	vpunpckldq	%zmm7,%zmm6,%zmm19
+	vpunpckhdq	%zmm5,%zmm4,%zmm4
+	vpunpckhdq	%zmm7,%zmm6,%zmm6
+	vpunpcklqdq	%zmm19,%zmm2,%zmm5
+	vpunpckhqdq	%zmm19,%zmm2,%zmm2
+	vpunpcklqdq	%zmm6,%zmm4,%zmm7
+	vpunpckhqdq	%zmm6,%zmm4,%zmm4
+	vshufi32x4	$0x44,%zmm5,%zmm1,%zmm19
+	vshufi32x4	$0xee,%zmm5,%zmm1,%zmm5
+	vshufi32x4	$0x44,%zmm2,%zmm18,%zmm1
+	vshufi32x4	$0xee,%zmm2,%zmm18,%zmm2
+	vshufi32x4	$0x44,%zmm7,%zmm3,%zmm18
+	vshufi32x4	$0xee,%zmm7,%zmm3,%zmm7
+	vshufi32x4	$0x44,%zmm4,%zmm0,%zmm3
+	vshufi32x4	$0xee,%zmm4,%zmm0,%zmm4
+	vpaddd	%zmm24,%zmm8,%zmm8
+	vpaddd	%zmm25,%zmm9,%zmm9
+	vpaddd	%zmm26,%zmm10,%zmm10
+	vpaddd	%zmm27,%zmm11,%zmm11
+
+	vpunpckldq	%zmm9,%zmm8,%zmm6
+	vpunpckldq	%zmm11,%zmm10,%zmm0
+	vpunpckhdq	%zmm9,%zmm8,%zmm8
+	vpunpckhdq	%zmm11,%zmm10,%zmm10
+	vpunpcklqdq	%zmm0,%zmm6,%zmm9
+	vpunpckhqdq	%zmm0,%zmm6,%zmm6
+	vpunpcklqdq	%zmm10,%zmm8,%zmm11
+	vpunpckhqdq	%zmm10,%zmm8,%zmm8
+	vpaddd	%zmm28,%zmm12,%zmm12
+	vpaddd	%zmm29,%zmm13,%zmm13
+	vpaddd	%zmm30,%zmm14,%zmm14
+	vpaddd	%zmm31,%zmm15,%zmm15
+
+	vpunpckldq	%zmm13,%zmm12,%zmm10
+	vpunpckldq	%zmm15,%zmm14,%zmm0
+	vpunpckhdq	%zmm13,%zmm12,%zmm12
+	vpunpckhdq	%zmm15,%zmm14,%zmm14
+	vpunpcklqdq	%zmm0,%zmm10,%zmm13
+	vpunpckhqdq	%zmm0,%zmm10,%zmm10
+	vpunpcklqdq	%zmm14,%zmm12,%zmm15
+	vpunpckhqdq	%zmm14,%zmm12,%zmm12
+	vshufi32x4	$0x44,%zmm13,%zmm9,%zmm0
+	vshufi32x4	$0xee,%zmm13,%zmm9,%zmm13
+	vshufi32x4	$0x44,%zmm10,%zmm6,%zmm9
+	vshufi32x4	$0xee,%zmm10,%zmm6,%zmm10
+	vshufi32x4	$0x44,%zmm15,%zmm11,%zmm6
+	vshufi32x4	$0xee,%zmm15,%zmm11,%zmm15
+	vshufi32x4	$0x44,%zmm12,%zmm8,%zmm11
+	vshufi32x4	$0xee,%zmm12,%zmm8,%zmm12
+	vshufi32x4	$0x88,%zmm0,%zmm19,%zmm16
+	vshufi32x4	$0xdd,%zmm0,%zmm19,%zmm19
+	vshufi32x4	$0x88,%zmm13,%zmm5,%zmm0
+	vshufi32x4	$0xdd,%zmm13,%zmm5,%zmm13
+	vshufi32x4	$0x88,%zmm9,%zmm1,%zmm17
+	vshufi32x4	$0xdd,%zmm9,%zmm1,%zmm1
+	vshufi32x4	$0x88,%zmm10,%zmm2,%zmm9
+	vshufi32x4	$0xdd,%zmm10,%zmm2,%zmm10
+	vshufi32x4	$0x88,%zmm6,%zmm18,%zmm14
+	vshufi32x4	$0xdd,%zmm6,%zmm18,%zmm18
+	vshufi32x4	$0x88,%zmm15,%zmm7,%zmm6
+	vshufi32x4	$0xdd,%zmm15,%zmm7,%zmm15
+	vshufi32x4	$0x88,%zmm11,%zmm3,%zmm8
+	vshufi32x4	$0xdd,%zmm11,%zmm3,%zmm3
+	vshufi32x4	$0x88,%zmm12,%zmm4,%zmm11
+	vshufi32x4	$0xdd,%zmm12,%zmm4,%zmm12
+	cmpq	$1024,%rdx
+	jb	.Ltail16x
+
+	vpxord	0(%rsi),%zmm16,%zmm16
+	vpxord	64(%rsi),%zmm17,%zmm17
+	vpxord	128(%rsi),%zmm14,%zmm14
+	vpxord	192(%rsi),%zmm8,%zmm8
+	vmovdqu32	%zmm16,0(%rdi)
+	vmovdqu32	%zmm17,64(%rdi)
+	vmovdqu32	%zmm14,128(%rdi)
+	vmovdqu32	%zmm8,192(%rdi)
+
+	vpxord	256(%rsi),%zmm19,%zmm19
+	vpxord	320(%rsi),%zmm1,%zmm1
+	vpxord	384(%rsi),%zmm18,%zmm18
+	vpxord	448(%rsi),%zmm3,%zmm3
+	vmovdqu32	%zmm19,256(%rdi)
+	vmovdqu32	%zmm1,320(%rdi)
+	vmovdqu32	%zmm18,384(%rdi)
+	vmovdqu32	%zmm3,448(%rdi)
+
+	vpxord	512(%rsi),%zmm0,%zmm0
+	vpxord	576(%rsi),%zmm9,%zmm9
+	vpxord	640(%rsi),%zmm6,%zmm6
+	vpxord	704(%rsi),%zmm11,%zmm11
+	vmovdqu32	%zmm0,512(%rdi)
+	vmovdqu32	%zmm9,576(%rdi)
+	vmovdqu32	%zmm6,640(%rdi)
+	vmovdqu32	%zmm11,704(%rdi)
+
+	vpxord	768(%rsi),%zmm13,%zmm13
+	vpxord	832(%rsi),%zmm10,%zmm10
+	vpxord	896(%rsi),%zmm15,%zmm15
+	vpxord	960(%rsi),%zmm12,%zmm12
+	leaq	1024(%rsi),%rsi
+	vmovdqu32	%zmm13,768(%rdi)
+	vmovdqu32	%zmm10,832(%rdi)
+	vmovdqu32	%zmm15,896(%rdi)
+	vmovdqu32	%zmm12,960(%rdi)
+	leaq	1024(%rdi),%rdi
+
+	subq	$1024,%rdx
+	jnz	.Loop_outer16x
+
+	jmp	.Ldone16x
+
+.align	32
+.Ltail16x:
+	xorq	%r9,%r9
+	subq	%rsi,%rdi
+	cmpq	$64,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm16,%zmm16
+	vmovdqu32	%zmm16,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm17,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$128,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm17,%zmm17
+	vmovdqu32	%zmm17,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm14,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$192,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm14,%zmm14
+	vmovdqu32	%zmm14,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm8,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$256,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm8,%zmm8
+	vmovdqu32	%zmm8,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm19,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$320,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm19,%zmm19
+	vmovdqu32	%zmm19,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm1,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$384,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm1,%zmm1
+	vmovdqu32	%zmm1,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm18,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$448,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm18,%zmm18
+	vmovdqu32	%zmm18,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm3,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$512,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm3,%zmm3
+	vmovdqu32	%zmm3,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm0,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$576,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm0,%zmm0
+	vmovdqu32	%zmm0,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm9,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$640,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm9,%zmm9
+	vmovdqu32	%zmm9,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm6,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$704,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm6,%zmm6
+	vmovdqu32	%zmm6,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm11,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$768,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm11,%zmm11
+	vmovdqu32	%zmm11,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm13,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$832,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm13,%zmm13
+	vmovdqu32	%zmm13,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm10,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$896,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm10,%zmm10
+	vmovdqu32	%zmm10,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm15,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$960,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm15,%zmm15
+	vmovdqu32	%zmm15,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm12,%zmm16
+	leaq	64(%rsi),%rsi
+
+.Less_than_64_16x:
+	vmovdqa32	%zmm16,0(%rsp)
+	leaq	(%rdi,%rsi,1),%rdi
+	andq	$63,%rdx
+
+.Loop_tail16x:
+	movzbl	(%rsi,%r9,1),%eax
+	movzbl	(%rsp,%r9,1),%ecx
+	leaq	1(%r9),%r9
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r9,1)
+	decq	%rdx
+	jnz	.Loop_tail16x
+
+	vpxord	%zmm16,%zmm16,%zmm16
+	vmovdqa32	%zmm16,0(%rsp)
+
+.Ldone16x:
+	vzeroall
+	leaq	-8(%r10),%rsp
+
+.L16x_epilogue:
+	ret
+ENDPROC(chacha20_avx512)
+
+.align	32
+ENTRY(chacha20_avx512vl)
+	cmpq	$0,%rdx
+	je	.Lavx512vl_epilogue
+
+	leaq	8(%rsp),%r10
+
+	cmpq	$128,%rdx
+	ja	.Lchacha20_8xvl
+
+	subq	$64+8,%rsp
+	andq	$-64,%rsp
+	vbroadcasti128	.Lsigma(%rip),%ymm0
+	vbroadcasti128	(%rcx),%ymm1
+	vbroadcasti128	16(%rcx),%ymm2
+	vbroadcasti128	(%r8),%ymm3
+
+	vmovdqa32	%ymm0,%ymm16
+	vmovdqa32	%ymm1,%ymm17
+	vmovdqa32	%ymm2,%ymm18
+	vpaddd	.Lzeroz(%rip),%ymm3,%ymm3
+	vmovdqa32	.Ltwoy(%rip),%ymm20
+	movq	$10,%r8
+	vmovdqa32	%ymm3,%ymm19
+	jmp	.Loop_avx512vl
+
+.align	16
+.Loop_outer_avx512vl:
+	vmovdqa32	%ymm18,%ymm2
+	vpaddd	%ymm20,%ymm19,%ymm3
+	movq	$10,%r8
+	vmovdqa32	%ymm3,%ymm19
+	jmp	.Loop_avx512vl
+
+.align	32
+.Loop_avx512vl:
+	vpaddd	%ymm1,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm3,%ymm3
+	vprold	$16,%ymm3,%ymm3
+	vpaddd	%ymm3,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm1,%ymm1
+	vprold	$12,%ymm1,%ymm1
+	vpaddd	%ymm1,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm3,%ymm3
+	vprold	$8,%ymm3,%ymm3
+	vpaddd	%ymm3,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm1,%ymm1
+	vprold	$7,%ymm1,%ymm1
+	vpshufd	$78,%ymm2,%ymm2
+	vpshufd	$57,%ymm1,%ymm1
+	vpshufd	$147,%ymm3,%ymm3
+	vpaddd	%ymm1,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm3,%ymm3
+	vprold	$16,%ymm3,%ymm3
+	vpaddd	%ymm3,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm1,%ymm1
+	vprold	$12,%ymm1,%ymm1
+	vpaddd	%ymm1,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm3,%ymm3
+	vprold	$8,%ymm3,%ymm3
+	vpaddd	%ymm3,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm1,%ymm1
+	vprold	$7,%ymm1,%ymm1
+	vpshufd	$78,%ymm2,%ymm2
+	vpshufd	$147,%ymm1,%ymm1
+	vpshufd	$57,%ymm3,%ymm3
+	decq	%r8
+	jnz	.Loop_avx512vl
+	vpaddd	%ymm16,%ymm0,%ymm0
+	vpaddd	%ymm17,%ymm1,%ymm1
+	vpaddd	%ymm18,%ymm2,%ymm2
+	vpaddd	%ymm19,%ymm3,%ymm3
+
+	subq	$64,%rdx
+	jb	.Ltail64_avx512vl
+
+	vpxor	0(%rsi),%xmm0,%xmm4
+	vpxor	16(%rsi),%xmm1,%xmm5
+	vpxor	32(%rsi),%xmm2,%xmm6
+	vpxor	48(%rsi),%xmm3,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	jz	.Ldone_avx512vl
+
+	vextracti128	$1,%ymm0,%xmm4
+	vextracti128	$1,%ymm1,%xmm5
+	vextracti128	$1,%ymm2,%xmm6
+	vextracti128	$1,%ymm3,%xmm7
+
+	subq	$64,%rdx
+	jb	.Ltail_avx512vl
+
+	vpxor	0(%rsi),%xmm4,%xmm4
+	vpxor	16(%rsi),%xmm5,%xmm5
+	vpxor	32(%rsi),%xmm6,%xmm6
+	vpxor	48(%rsi),%xmm7,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	vmovdqa32	%ymm16,%ymm0
+	vmovdqa32	%ymm17,%ymm1
+	jnz	.Loop_outer_avx512vl
+
+	jmp	.Ldone_avx512vl
+
+.align	16
+.Ltail64_avx512vl:
+	vmovdqa	%xmm0,0(%rsp)
+	vmovdqa	%xmm1,16(%rsp)
+	vmovdqa	%xmm2,32(%rsp)
+	vmovdqa	%xmm3,48(%rsp)
+	addq	$64,%rdx
+	jmp	.Loop_tail_avx512vl
+
+.align	16
+.Ltail_avx512vl:
+	vmovdqa	%xmm4,0(%rsp)
+	vmovdqa	%xmm5,16(%rsp)
+	vmovdqa	%xmm6,32(%rsp)
+	vmovdqa	%xmm7,48(%rsp)
+	addq	$64,%rdx
+
+.Loop_tail_avx512vl:
+	movzbl	(%rsi,%r8,1),%eax
+	movzbl	(%rsp,%r8,1),%ecx
+	leaq	1(%r8),%r8
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r8,1)
+	decq	%rdx
+	jnz	.Loop_tail_avx512vl
+
+	vmovdqa32	%ymm16,0(%rsp)
+	vmovdqa32	%ymm16,32(%rsp)
+
+.Ldone_avx512vl:
+	vzeroall
+	leaq	-8(%r10),%rsp
+.Lavx512vl_epilogue:
+	ret
+
+.align	32
+.Lchacha20_8xvl:
+	leaq	8(%rsp),%r10
+	subq	$64+8,%rsp
+	andq	$-64,%rsp
+	vzeroupper
+
+	leaq	.Lsigma(%rip),%r9
+	vbroadcasti128	(%r9),%ymm3
+	vbroadcasti128	(%rcx),%ymm7
+	vbroadcasti128	16(%rcx),%ymm11
+	vbroadcasti128	(%r8),%ymm15
+
+	vpshufd	$0x00,%ymm3,%ymm0
+	vpshufd	$0x55,%ymm3,%ymm1
+	vpshufd	$0xaa,%ymm3,%ymm2
+	vpshufd	$0xff,%ymm3,%ymm3
+	vmovdqa64	%ymm0,%ymm16
+	vmovdqa64	%ymm1,%ymm17
+	vmovdqa64	%ymm2,%ymm18
+	vmovdqa64	%ymm3,%ymm19
+
+	vpshufd	$0x00,%ymm7,%ymm4
+	vpshufd	$0x55,%ymm7,%ymm5
+	vpshufd	$0xaa,%ymm7,%ymm6
+	vpshufd	$0xff,%ymm7,%ymm7
+	vmovdqa64	%ymm4,%ymm20
+	vmovdqa64	%ymm5,%ymm21
+	vmovdqa64	%ymm6,%ymm22
+	vmovdqa64	%ymm7,%ymm23
+
+	vpshufd	$0x00,%ymm11,%ymm8
+	vpshufd	$0x55,%ymm11,%ymm9
+	vpshufd	$0xaa,%ymm11,%ymm10
+	vpshufd	$0xff,%ymm11,%ymm11
+	vmovdqa64	%ymm8,%ymm24
+	vmovdqa64	%ymm9,%ymm25
+	vmovdqa64	%ymm10,%ymm26
+	vmovdqa64	%ymm11,%ymm27
+
+	vpshufd	$0x00,%ymm15,%ymm12
+	vpshufd	$0x55,%ymm15,%ymm13
+	vpshufd	$0xaa,%ymm15,%ymm14
+	vpshufd	$0xff,%ymm15,%ymm15
+	vpaddd	.Lincy(%rip),%ymm12,%ymm12
+	vmovdqa64	%ymm12,%ymm28
+	vmovdqa64	%ymm13,%ymm29
+	vmovdqa64	%ymm14,%ymm30
+	vmovdqa64	%ymm15,%ymm31
+
+	movl	$10,%eax
+	jmp	.Loop8xvl
+
+.align	32
+.Loop_outer8xvl:
+
+
+	vpbroadcastd	8(%r9),%ymm2
+	vpbroadcastd	12(%r9),%ymm3
+	vpaddd	.Leight(%rip),%ymm28,%ymm28
+	vmovdqa64	%ymm20,%ymm4
+	vmovdqa64	%ymm21,%ymm5
+	vmovdqa64	%ymm22,%ymm6
+	vmovdqa64	%ymm23,%ymm7
+	vmovdqa64	%ymm24,%ymm8
+	vmovdqa64	%ymm25,%ymm9
+	vmovdqa64	%ymm26,%ymm10
+	vmovdqa64	%ymm27,%ymm11
+	vmovdqa64	%ymm28,%ymm12
+	vmovdqa64	%ymm29,%ymm13
+	vmovdqa64	%ymm30,%ymm14
+	vmovdqa64	%ymm31,%ymm15
+
+	vmovdqa64	%ymm0,%ymm16
+	vmovdqa64	%ymm1,%ymm17
+	vmovdqa64	%ymm2,%ymm18
+	vmovdqa64	%ymm3,%ymm19
+
+	movl	$10,%eax
+	jmp	.Loop8xvl
+
+.align	32
+.Loop8xvl:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm3,%ymm15,%ymm15
+	vprold	$16,%ymm12,%ymm12
+	vprold	$16,%ymm13,%ymm13
+	vprold	$16,%ymm14,%ymm14
+	vprold	$16,%ymm15,%ymm15
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm11,%ymm7,%ymm7
+	vprold	$12,%ymm4,%ymm4
+	vprold	$12,%ymm5,%ymm5
+	vprold	$12,%ymm6,%ymm6
+	vprold	$12,%ymm7,%ymm7
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm3,%ymm15,%ymm15
+	vprold	$8,%ymm12,%ymm12
+	vprold	$8,%ymm13,%ymm13
+	vprold	$8,%ymm14,%ymm14
+	vprold	$8,%ymm15,%ymm15
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm11,%ymm7,%ymm7
+	vprold	$7,%ymm4,%ymm4
+	vprold	$7,%ymm5,%ymm5
+	vprold	$7,%ymm6,%ymm6
+	vprold	$7,%ymm7,%ymm7
+	vpaddd	%ymm5,%ymm0,%ymm0
+	vpaddd	%ymm6,%ymm1,%ymm1
+	vpaddd	%ymm7,%ymm2,%ymm2
+	vpaddd	%ymm4,%ymm3,%ymm3
+	vpxor	%ymm0,%ymm15,%ymm15
+	vpxor	%ymm1,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm14,%ymm14
+	vprold	$16,%ymm15,%ymm15
+	vprold	$16,%ymm12,%ymm12
+	vprold	$16,%ymm13,%ymm13
+	vprold	$16,%ymm14,%ymm14
+	vpaddd	%ymm15,%ymm10,%ymm10
+	vpaddd	%ymm12,%ymm11,%ymm11
+	vpaddd	%ymm13,%ymm8,%ymm8
+	vpaddd	%ymm14,%ymm9,%ymm9
+	vpxor	%ymm10,%ymm5,%ymm5
+	vpxor	%ymm11,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpxor	%ymm9,%ymm4,%ymm4
+	vprold	$12,%ymm5,%ymm5
+	vprold	$12,%ymm6,%ymm6
+	vprold	$12,%ymm7,%ymm7
+	vprold	$12,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm0,%ymm0
+	vpaddd	%ymm6,%ymm1,%ymm1
+	vpaddd	%ymm7,%ymm2,%ymm2
+	vpaddd	%ymm4,%ymm3,%ymm3
+	vpxor	%ymm0,%ymm15,%ymm15
+	vpxor	%ymm1,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm14,%ymm14
+	vprold	$8,%ymm15,%ymm15
+	vprold	$8,%ymm12,%ymm12
+	vprold	$8,%ymm13,%ymm13
+	vprold	$8,%ymm14,%ymm14
+	vpaddd	%ymm15,%ymm10,%ymm10
+	vpaddd	%ymm12,%ymm11,%ymm11
+	vpaddd	%ymm13,%ymm8,%ymm8
+	vpaddd	%ymm14,%ymm9,%ymm9
+	vpxor	%ymm10,%ymm5,%ymm5
+	vpxor	%ymm11,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpxor	%ymm9,%ymm4,%ymm4
+	vprold	$7,%ymm5,%ymm5
+	vprold	$7,%ymm6,%ymm6
+	vprold	$7,%ymm7,%ymm7
+	vprold	$7,%ymm4,%ymm4
+	decl	%eax
+	jnz	.Loop8xvl
+
+	vpaddd	%ymm16,%ymm0,%ymm0
+	vpaddd	%ymm17,%ymm1,%ymm1
+	vpaddd	%ymm18,%ymm2,%ymm2
+	vpaddd	%ymm19,%ymm3,%ymm3
+
+	vpunpckldq	%ymm1,%ymm0,%ymm18
+	vpunpckldq	%ymm3,%ymm2,%ymm19
+	vpunpckhdq	%ymm1,%ymm0,%ymm0
+	vpunpckhdq	%ymm3,%ymm2,%ymm2
+	vpunpcklqdq	%ymm19,%ymm18,%ymm1
+	vpunpckhqdq	%ymm19,%ymm18,%ymm18
+	vpunpcklqdq	%ymm2,%ymm0,%ymm3
+	vpunpckhqdq	%ymm2,%ymm0,%ymm0
+	vpaddd	%ymm20,%ymm4,%ymm4
+	vpaddd	%ymm21,%ymm5,%ymm5
+	vpaddd	%ymm22,%ymm6,%ymm6
+	vpaddd	%ymm23,%ymm7,%ymm7
+
+	vpunpckldq	%ymm5,%ymm4,%ymm2
+	vpunpckldq	%ymm7,%ymm6,%ymm19
+	vpunpckhdq	%ymm5,%ymm4,%ymm4
+	vpunpckhdq	%ymm7,%ymm6,%ymm6
+	vpunpcklqdq	%ymm19,%ymm2,%ymm5
+	vpunpckhqdq	%ymm19,%ymm2,%ymm2
+	vpunpcklqdq	%ymm6,%ymm4,%ymm7
+	vpunpckhqdq	%ymm6,%ymm4,%ymm4
+	vshufi32x4	$0,%ymm5,%ymm1,%ymm19
+	vshufi32x4	$3,%ymm5,%ymm1,%ymm5
+	vshufi32x4	$0,%ymm2,%ymm18,%ymm1
+	vshufi32x4	$3,%ymm2,%ymm18,%ymm2
+	vshufi32x4	$0,%ymm7,%ymm3,%ymm18
+	vshufi32x4	$3,%ymm7,%ymm3,%ymm7
+	vshufi32x4	$0,%ymm4,%ymm0,%ymm3
+	vshufi32x4	$3,%ymm4,%ymm0,%ymm4
+	vpaddd	%ymm24,%ymm8,%ymm8
+	vpaddd	%ymm25,%ymm9,%ymm9
+	vpaddd	%ymm26,%ymm10,%ymm10
+	vpaddd	%ymm27,%ymm11,%ymm11
+
+	vpunpckldq	%ymm9,%ymm8,%ymm6
+	vpunpckldq	%ymm11,%ymm10,%ymm0
+	vpunpckhdq	%ymm9,%ymm8,%ymm8
+	vpunpckhdq	%ymm11,%ymm10,%ymm10
+	vpunpcklqdq	%ymm0,%ymm6,%ymm9
+	vpunpckhqdq	%ymm0,%ymm6,%ymm6
+	vpunpcklqdq	%ymm10,%ymm8,%ymm11
+	vpunpckhqdq	%ymm10,%ymm8,%ymm8
+	vpaddd	%ymm28,%ymm12,%ymm12
+	vpaddd	%ymm29,%ymm13,%ymm13
+	vpaddd	%ymm30,%ymm14,%ymm14
+	vpaddd	%ymm31,%ymm15,%ymm15
+
+	vpunpckldq	%ymm13,%ymm12,%ymm10
+	vpunpckldq	%ymm15,%ymm14,%ymm0
+	vpunpckhdq	%ymm13,%ymm12,%ymm12
+	vpunpckhdq	%ymm15,%ymm14,%ymm14
+	vpunpcklqdq	%ymm0,%ymm10,%ymm13
+	vpunpckhqdq	%ymm0,%ymm10,%ymm10
+	vpunpcklqdq	%ymm14,%ymm12,%ymm15
+	vpunpckhqdq	%ymm14,%ymm12,%ymm12
+	vperm2i128	$0x20,%ymm13,%ymm9,%ymm0
+	vperm2i128	$0x31,%ymm13,%ymm9,%ymm13
+	vperm2i128	$0x20,%ymm10,%ymm6,%ymm9
+	vperm2i128	$0x31,%ymm10,%ymm6,%ymm10
+	vperm2i128	$0x20,%ymm15,%ymm11,%ymm6
+	vperm2i128	$0x31,%ymm15,%ymm11,%ymm15
+	vperm2i128	$0x20,%ymm12,%ymm8,%ymm11
+	vperm2i128	$0x31,%ymm12,%ymm8,%ymm12
+	cmpq	$512,%rdx
+	jb	.Ltail8xvl
+
+	movl	$0x80,%eax
+	vpxord	0(%rsi),%ymm19,%ymm19
+	vpxor	32(%rsi),%ymm0,%ymm0
+	vpxor	64(%rsi),%ymm5,%ymm5
+	vpxor	96(%rsi),%ymm13,%ymm13
+	leaq	(%rsi,%rax,1),%rsi
+	vmovdqu32	%ymm19,0(%rdi)
+	vmovdqu	%ymm0,32(%rdi)
+	vmovdqu	%ymm5,64(%rdi)
+	vmovdqu	%ymm13,96(%rdi)
+	leaq	(%rdi,%rax,1),%rdi
+
+	vpxor	0(%rsi),%ymm1,%ymm1
+	vpxor	32(%rsi),%ymm9,%ymm9
+	vpxor	64(%rsi),%ymm2,%ymm2
+	vpxor	96(%rsi),%ymm10,%ymm10
+	leaq	(%rsi,%rax,1),%rsi
+	vmovdqu	%ymm1,0(%rdi)
+	vmovdqu	%ymm9,32(%rdi)
+	vmovdqu	%ymm2,64(%rdi)
+	vmovdqu	%ymm10,96(%rdi)
+	leaq	(%rdi,%rax,1),%rdi
+
+	vpxord	0(%rsi),%ymm18,%ymm18
+	vpxor	32(%rsi),%ymm6,%ymm6
+	vpxor	64(%rsi),%ymm7,%ymm7
+	vpxor	96(%rsi),%ymm15,%ymm15
+	leaq	(%rsi,%rax,1),%rsi
+	vmovdqu32	%ymm18,0(%rdi)
+	vmovdqu	%ymm6,32(%rdi)
+	vmovdqu	%ymm7,64(%rdi)
+	vmovdqu	%ymm15,96(%rdi)
+	leaq	(%rdi,%rax,1),%rdi
+
+	vpxor	0(%rsi),%ymm3,%ymm3
+	vpxor	32(%rsi),%ymm11,%ymm11
+	vpxor	64(%rsi),%ymm4,%ymm4
+	vpxor	96(%rsi),%ymm12,%ymm12
+	leaq	(%rsi,%rax,1),%rsi
+	vmovdqu	%ymm3,0(%rdi)
+	vmovdqu	%ymm11,32(%rdi)
+	vmovdqu	%ymm4,64(%rdi)
+	vmovdqu	%ymm12,96(%rdi)
+	leaq	(%rdi,%rax,1),%rdi
+
+	vpbroadcastd	0(%r9),%ymm0
+	vpbroadcastd	4(%r9),%ymm1
+
+	subq	$512,%rdx
+	jnz	.Loop_outer8xvl
+
+	jmp	.Ldone8xvl
+
+.align	32
+.Ltail8xvl:
+	vmovdqa64	%ymm19,%ymm8
+	xorq	%r9,%r9
+	subq	%rsi,%rdi
+	cmpq	$64,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm8,%ymm8
+	vpxor	32(%rsi),%ymm0,%ymm0
+	vmovdqu	%ymm8,0(%rdi,%rsi,1)
+	vmovdqu	%ymm0,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm5,%ymm8
+	vmovdqa	%ymm13,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$128,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm5,%ymm5
+	vpxor	32(%rsi),%ymm13,%ymm13
+	vmovdqu	%ymm5,0(%rdi,%rsi,1)
+	vmovdqu	%ymm13,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm1,%ymm8
+	vmovdqa	%ymm9,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$192,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm1,%ymm1
+	vpxor	32(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm1,0(%rdi,%rsi,1)
+	vmovdqu	%ymm9,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm2,%ymm8
+	vmovdqa	%ymm10,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$256,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm2,%ymm2
+	vpxor	32(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm2,0(%rdi,%rsi,1)
+	vmovdqu	%ymm10,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa32	%ymm18,%ymm8
+	vmovdqa	%ymm6,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$320,%rdx
+	jb	.Less_than_64_8xvl
+	vpxord	0(%rsi),%ymm18,%ymm18
+	vpxor	32(%rsi),%ymm6,%ymm6
+	vmovdqu32	%ymm18,0(%rdi,%rsi,1)
+	vmovdqu	%ymm6,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm7,%ymm8
+	vmovdqa	%ymm15,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$384,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm7,%ymm7
+	vpxor	32(%rsi),%ymm15,%ymm15
+	vmovdqu	%ymm7,0(%rdi,%rsi,1)
+	vmovdqu	%ymm15,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm3,%ymm8
+	vmovdqa	%ymm11,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$448,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm3,%ymm3
+	vpxor	32(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm3,0(%rdi,%rsi,1)
+	vmovdqu	%ymm11,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm4,%ymm8
+	vmovdqa	%ymm12,%ymm0
+	leaq	64(%rsi),%rsi
+
+.Less_than_64_8xvl:
+	vmovdqa	%ymm8,0(%rsp)
+	vmovdqa	%ymm0,32(%rsp)
+	leaq	(%rdi,%rsi,1),%rdi
+	andq	$63,%rdx
+
+.Loop_tail8xvl:
+	movzbl	(%rsi,%r9,1),%eax
+	movzbl	(%rsp,%r9,1),%ecx
+	leaq	1(%r9),%r9
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r9,1)
+	decq	%rdx
+	jnz	.Loop_tail8xvl
+
+	vpxor	%ymm8,%ymm8,%ymm8
+	vmovdqa	%ymm8,0(%rsp)
+	vmovdqa	%ymm8,32(%rsp)
+
+.Ldone8xvl:
+	vzeroall
+	leaq	-8(%r10),%rsp
+.L8xvl_epilogue:
+	ret
+ENDPROC(chacha20_avx512vl)
+
+#endif /* CONFIG_AS_AVX512 */
diff --git b/net/wireguard/crypto/chacha20poly1305.c b/net/wireguard/crypto/chacha20poly1305.c
new file mode 100644
index 0000000..4c423ad
--- /dev/null
+++ b/net/wireguard/crypto/chacha20poly1305.c
@@ -0,0 +1,823 @@
+/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+ */
+
+#include "chacha20poly1305.h"
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/version.h>
+#include <crypto/algapi.h>
+#include <crypto/scatterwalk.h>
+#include <asm/unaligned.h>
+
+#if defined(CONFIG_X86_64)
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+asmlinkage void poly1305_init_x86_64(void *ctx, const u8 key[16]);
+asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, size_t len, u32 padbit);
+asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[16], const u32 nonce[4]);
+#ifdef CONFIG_AS_SSSE3
+asmlinkage void hchacha20_ssse3(u8 *derived_key, const u8 *nonce, const u8 *key);
+asmlinkage void chacha20_ssse3(u8 *out, const u8 *in, size_t len, const u32 key[8], const u32 counter[4]);
+#endif
+#ifdef CONFIG_AS_AVX
+asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[16], const u32 nonce[4]);
+asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, size_t len, u32 padbit);
+#endif
+#ifdef CONFIG_AS_AVX2
+asmlinkage void chacha20_avx2(u8 *out, const u8 *in, size_t len, const u32 key[8], const u32 counter[4]);
+asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, size_t len, u32 padbit);
+#endif
+#ifdef CONFIG_AS_AVX512
+asmlinkage void chacha20_avx512(u8 *out, const u8 *in, size_t len, const u32 key[8], const u32 counter[4]);
+asmlinkage void chacha20_avx512vl(u8 *out, const u8 *in, size_t len, const u32 key[8], const u32 counter[4]);
+asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp, size_t len, u32 padbit);
+#endif
+
+static bool chacha20poly1305_use_ssse3 __read_mostly;
+static bool chacha20poly1305_use_avx __read_mostly;
+static bool chacha20poly1305_use_avx2 __read_mostly;
+static bool chacha20poly1305_use_avx512 __read_mostly;
+
+void __init chacha20poly1305_fpu_init(void)
+{
+	chacha20poly1305_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3);
+	chacha20poly1305_use_avx = boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+	chacha20poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+#ifndef COMPAT_CANNOT_USE_AVX512
+	chacha20poly1305_use_avx512 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_ZMM_Hi256, NULL);
+#endif
+}
+#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+asmlinkage void poly1305_init_arm(void *ctx, const u8 key[16]);
+asmlinkage void poly1305_blocks_arm(void *ctx, const u8 *inp, size_t len, u32 padbit);
+asmlinkage void poly1305_emit_arm(void *ctx, u8 mac[16], const u32 nonce[4]);
+asmlinkage void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8], const u32 counter[4]);
+#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (!defined(__LINUX_ARM_ARCH__) || __LINUX_ARM_ARCH__ >= 7)
+#define ARM_USE_NEON
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+asmlinkage void poly1305_blocks_neon(void *ctx, const u8 *inp, size_t len, u32 padbit);
+asmlinkage void poly1305_emit_neon(void *ctx, u8 mac[16], const u32 nonce[4]);
+asmlinkage void chacha20_neon(u8 *out, const u8 *in, size_t len, const u32 key[8], const u32 counter[4]);
+#endif
+static bool chacha20poly1305_use_neon __read_mostly;
+void __init chacha20poly1305_fpu_init(void)
+{
+#if defined(CONFIG_ARM64)
+	chacha20poly1305_use_neon = elf_hwcap & HWCAP_ASIMD;
+#elif defined(CONFIG_ARM)
+	chacha20poly1305_use_neon = elf_hwcap & HWCAP_NEON;
+#endif
+}
+#elif defined(CONFIG_MIPS) && defined(CONFIG_64BIT)
+asmlinkage void poly1305_init_mips(void *ctx, const u8 key[16]);
+asmlinkage void poly1305_blocks_mips(void *ctx, const u8 *inp, size_t len, u32 padbit);
+asmlinkage void poly1305_emit_mips(void *ctx, u8 mac[16], const u32 nonce[4]);
+void __init chacha20poly1305_fpu_init(void) { }
+#else
+void __init chacha20poly1305_fpu_init(void) { }
+#endif
+
+enum {
+	CHACHA20_IV_SIZE = 16,
+	CHACHA20_KEY_SIZE = 32,
+	CHACHA20_BLOCK_SIZE = 64,
+	POLY1305_BLOCK_SIZE = 16,
+	POLY1305_KEY_SIZE = 32,
+	POLY1305_MAC_SIZE = 16
+};
+
+static inline u32 le32_to_cpuvp(const void *p)
+{
+	return le32_to_cpup(p);
+}
+
+static inline u64 le64_to_cpuvp(const void *p)
+{
+	return le64_to_cpup(p);
+}
+
+static inline u32 rotl32(u32 v, u8 n)
+{
+	return (v << n) | (v >> (sizeof(v) * 8 - n));
+}
+
+struct chacha20_ctx {
+	u32 state[CHACHA20_BLOCK_SIZE / sizeof(u32)];
+} __aligned(32);
+
+#define QUARTER_ROUND(x, a, b, c, d) ( \
+	x[a] += x[b], \
+	x[d] = rotl32((x[d] ^ x[a]), 16), \
+	x[c] += x[d], \
+	x[b] = rotl32((x[b] ^ x[c]), 12), \
+	x[a] += x[b], \
+	x[d] = rotl32((x[d] ^ x[a]), 8), \
+	x[c] += x[d], \
+	x[b] = rotl32((x[b] ^ x[c]), 7) \
+)
+
+#define DOUBLE_ROUND(x) ( \
+	/* Column Round */ \
+	QUARTER_ROUND(x, 0, 4, 8, 12), \
+	QUARTER_ROUND(x, 1, 5, 9, 13), \
+	QUARTER_ROUND(x, 2, 6, 10, 14), \
+	QUARTER_ROUND(x, 3, 7, 11, 15), \
+	/* Diagonal Round */ \
+	QUARTER_ROUND(x, 0, 5, 10, 15), \
+	QUARTER_ROUND(x, 1, 6, 11, 12), \
+	QUARTER_ROUND(x, 2, 7, 8, 13), \
+	QUARTER_ROUND(x, 3, 4, 9, 14) \
+)
+
+#define TWENTY_ROUNDS(x) ( \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x) \
+)
+
+static void chacha20_block_generic(struct chacha20_ctx *ctx, void *stream)
+{
+	u32 x[CHACHA20_BLOCK_SIZE / sizeof(u32)];
+	__le32 *out = stream;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(x); i++)
+		x[i] = ctx->state[i];
+
+	TWENTY_ROUNDS(x);
+
+	for (i = 0; i < ARRAY_SIZE(x); i++)
+		out[i] = cpu_to_le32(x[i] + ctx->state[i]);
+
+	ctx->state[12]++;
+}
+
+static void hchacha20_generic(u8 derived_key[CHACHA20POLY1305_KEYLEN], const u8 nonce[16], const u8 key[CHACHA20POLY1305_KEYLEN])
+{
+	__le32 *out = (__force __le32 *)derived_key;
+	u32 x[] = {
+		0x61707865, 0x3320646e, 0x79622d32, 0x6b206574,
+		le32_to_cpuvp(key + 0), le32_to_cpuvp(key + 4), le32_to_cpuvp(key + 8), le32_to_cpuvp(key + 12),
+		le32_to_cpuvp(key + 16), le32_to_cpuvp(key + 20), le32_to_cpuvp(key + 24), le32_to_cpuvp(key + 28),
+		le32_to_cpuvp(nonce +  0), le32_to_cpuvp(nonce +  4), le32_to_cpuvp(nonce +  8), le32_to_cpuvp(nonce + 12)
+	};
+
+	TWENTY_ROUNDS(x);
+
+	out[0] = cpu_to_le32(x[0]);
+	out[1] = cpu_to_le32(x[1]);
+	out[2] = cpu_to_le32(x[2]);
+	out[3] = cpu_to_le32(x[3]);
+	out[4] = cpu_to_le32(x[12]);
+	out[5] = cpu_to_le32(x[13]);
+	out[6] = cpu_to_le32(x[14]);
+	out[7] = cpu_to_le32(x[15]);
+}
+
+static inline void hchacha20(u8 derived_key[CHACHA20POLY1305_KEYLEN], const u8 nonce[16], const u8 key[CHACHA20POLY1305_KEYLEN], bool have_simd)
+{
+#if defined(CONFIG_X86_64) && defined(CONFIG_AS_SSSE3)
+	if (have_simd && chacha20poly1305_use_ssse3) {
+		hchacha20_ssse3(derived_key, nonce, key);
+		return;
+	}
+#endif
+
+	hchacha20_generic(derived_key, nonce, key);
+}
+
+#define chacha20_initial_state(key, nonce) {{ \
+	0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, \
+	le32_to_cpuvp((key) + 0), le32_to_cpuvp((key) + 4), le32_to_cpuvp((key) + 8), le32_to_cpuvp((key) + 12), \
+	le32_to_cpuvp((key) + 16), le32_to_cpuvp((key) + 20), le32_to_cpuvp((key) + 24), le32_to_cpuvp((key) + 28), \
+	0, 0, le32_to_cpuvp((nonce) +  0), le32_to_cpuvp((nonce) + 4) \
+}}
+
+static void chacha20_crypt(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 bytes, bool have_simd)
+{
+	u8 buf[CHACHA20_BLOCK_SIZE];
+
+	if (!have_simd
+#if defined(CONFIG_X86_64)
+		|| !chacha20poly1305_use_ssse3
+
+#elif defined(ARM_USE_NEON)
+		|| !chacha20poly1305_use_neon
+#endif
+	)
+		goto no_simd;
+
+#if defined(CONFIG_X86_64)
+#ifdef CONFIG_AS_AVX512
+	if (chacha20poly1305_use_avx512) {
+		chacha20_avx512(dst, src, bytes, &ctx->state[4], &ctx->state[12]);
+		ctx->state[12] += (bytes + 63) / 64;
+		return;
+	}
+#endif
+#ifdef CONFIG_AS_AVX2
+	if (chacha20poly1305_use_avx2) {
+		chacha20_avx2(dst, src, bytes, &ctx->state[4], &ctx->state[12]);
+		ctx->state[12] += (bytes + 63) / 64;
+		return;
+	}
+#endif
+#ifdef CONFIG_AS_SSSE3
+	chacha20_ssse3(dst, src, bytes, &ctx->state[4], &ctx->state[12]);
+	ctx->state[12] += (bytes + 63) / 64;
+	return;
+#endif
+#elif defined(ARM_USE_NEON)
+	chacha20_neon(dst, src, bytes, &ctx->state[4], &ctx->state[12]);
+	ctx->state[12] += (bytes + 63) / 64;
+	return;
+#endif
+
+no_simd:
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+	chacha20_arm(dst, src, bytes, &ctx->state[4], &ctx->state[12]);
+	ctx->state[12] += (bytes + 63) / 64;
+	return;
+#endif
+
+	if (dst != src)
+		memcpy(dst, src, bytes);
+
+	while (bytes >= CHACHA20_BLOCK_SIZE) {
+		chacha20_block_generic(ctx, buf);
+		crypto_xor(dst, buf, CHACHA20_BLOCK_SIZE);
+		bytes -= CHACHA20_BLOCK_SIZE;
+		dst += CHACHA20_BLOCK_SIZE;
+	}
+	if (bytes) {
+		chacha20_block_generic(ctx, buf);
+		crypto_xor(dst, buf, bytes);
+	}
+}
+typedef void (*poly1305_blocks_f)(void *ctx, const u8 *inp, size_t len, u32 padbit);
+typedef void (*poly1305_emit_f)(void *ctx, u8 mac[16], const u32 nonce[4]);
+
+struct poly1305_ctx {
+	u8 opaque[24 * sizeof(u64)];
+	u32 nonce[4];
+	u8 data[POLY1305_BLOCK_SIZE];
+	size_t num;
+	struct {
+		poly1305_blocks_f blocks;
+		poly1305_emit_f emit;
+	} func;
+} __aligned(8);
+
+#if !(defined(CONFIG_X86_64) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || (defined(CONFIG_MIPS) && defined(CONFIG_64BIT)))
+struct poly1305_internal {
+	u32 h[5];
+	u32 r[4];
+};
+
+static void poly1305_init_generic(void *ctx, const u8 key[16])
+{
+	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
+
+	/* h = 0 */
+	st->h[0] = 0;
+	st->h[1] = 0;
+	st->h[2] = 0;
+	st->h[3] = 0;
+	st->h[4] = 0;
+
+	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+	st->r[0] = le32_to_cpuvp(&key[ 0]) & 0x0fffffff;
+	st->r[1] = le32_to_cpuvp(&key[ 4]) & 0x0ffffffc;
+	st->r[2] = le32_to_cpuvp(&key[ 8]) & 0x0ffffffc;
+	st->r[3] = le32_to_cpuvp(&key[12]) & 0x0ffffffc;
+}
+
+static void poly1305_blocks_generic(void *ctx, const u8 *inp, size_t len, u32 padbit)
+{
+#define CONSTANT_TIME_CARRY(a,b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
+	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
+	u32 r0, r1, r2, r3;
+	u32 s1, s2, s3;
+	u32 h0, h1, h2, h3, h4, c;
+	u64 d0, d1, d2, d3;
+
+	r0 = st->r[0];
+	r1 = st->r[1];
+	r2 = st->r[2];
+	r3 = st->r[3];
+
+	s1 = r1 + (r1 >> 2);
+	s2 = r2 + (r2 >> 2);
+	s3 = r3 + (r3 >> 2);
+
+	h0 = st->h[0];
+	h1 = st->h[1];
+	h2 = st->h[2];
+	h3 = st->h[3];
+	h4 = st->h[4];
+
+	while (len >= POLY1305_BLOCK_SIZE) {
+		/* h += m[i] */
+		h0 = (u32)(d0 = (u64)h0 + le32_to_cpuvp(inp + 0));
+		h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + le32_to_cpuvp(inp + 4));
+		h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + le32_to_cpuvp(inp + 8));
+		h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + le32_to_cpuvp(inp + 12));
+		h4 += (u32)(d3 >> 32) + padbit;
+
+		/* h *= r "%" p, where "%" stands for "partial remainder" */
+		d0 = ((u64)h0 * r0) +
+		     ((u64)h1 * s3) +
+		     ((u64)h2 * s2) +
+		     ((u64)h3 * s1);
+		d1 = ((u64)h0 * r1) +
+		     ((u64)h1 * r0) +
+		     ((u64)h2 * s3) +
+		     ((u64)h3 * s2) +
+		     (h4 * s1);
+		d2 = ((u64)h0 * r2) +
+		     ((u64)h1 * r1) +
+		     ((u64)h2 * r0) +
+		     ((u64)h3 * s3) +
+		     (h4 * s2);
+		d3 = ((u64)h0 * r3) +
+		     ((u64)h1 * r2) +
+		     ((u64)h2 * r1) +
+		     ((u64)h3 * r0) +
+		     (h4 * s3);
+		h4 = (h4 * r0);
+
+		/* last reduction step: */
+		/* a) h4:h0 = h4<<128 + d3<<96 + d2<<64 + d1<<32 + d0 */
+		h0 = (u32)d0;
+		h1 = (u32)(d1 += d0 >> 32);
+		h2 = (u32)(d2 += d1 >> 32);
+		h3 = (u32)(d3 += d2 >> 32);
+		h4 += (u32)(d3 >> 32);
+		/* b) (h4:h0 += (h4:h0>>130) * 5) %= 2^130 */
+		c = (h4 >> 2) + (h4 & ~3U);
+		h4 &= 3;
+		h0 += c;
+		h1 += (c = CONSTANT_TIME_CARRY(h0,c));
+		h2 += (c = CONSTANT_TIME_CARRY(h1,c));
+		h3 += (c = CONSTANT_TIME_CARRY(h2,c));
+		h4 += CONSTANT_TIME_CARRY(h3,c);
+		/*
+		 * Occasional overflows to 3rd bit of h4 are taken care of
+		 * "naturally". If after this point we end up at the top of
+		 * this loop, then the overflow bit will be accounted for
+		 * in next iteration. If we end up in poly1305_emit, then
+		 * comparison to modulus below will still count as "carry
+		 * into 131st bit", so that properly reduced value will be
+		 * picked in conditional move.
+		 */
+
+		inp += POLY1305_BLOCK_SIZE;
+		len -= POLY1305_BLOCK_SIZE;
+	}
+
+	st->h[0] = h0;
+	st->h[1] = h1;
+	st->h[2] = h2;
+	st->h[3] = h3;
+	st->h[4] = h4;
+#undef CONSTANT_TIME_CARRY
+}
+
+static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
+{
+	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
+	__le32 *omac = (__force __le32 *)mac;
+	u32 h0, h1, h2, h3, h4;
+	u32 g0, g1, g2, g3, g4;
+	u64 t;
+	u32 mask;
+
+	h0 = st->h[0];
+	h1 = st->h[1];
+	h2 = st->h[2];
+	h3 = st->h[3];
+	h4 = st->h[4];
+
+	/* compare to modulus by computing h + -p */
+	g0 = (u32)(t = (u64)h0 + 5);
+	g1 = (u32)(t = (u64)h1 + (t >> 32));
+	g2 = (u32)(t = (u64)h2 + (t >> 32));
+	g3 = (u32)(t = (u64)h3 + (t >> 32));
+	g4 = h4 + (u32)(t >> 32);
+
+	/* if there was carry into 131st bit, h3:h0 = g3:g0 */
+	mask = 0 - (g4 >> 2);
+	g0 &= mask;
+	g1 &= mask;
+	g2 &= mask;
+	g3 &= mask;
+	mask = ~mask;
+	h0 = (h0 & mask) | g0;
+	h1 = (h1 & mask) | g1;
+	h2 = (h2 & mask) | g2;
+	h3 = (h3 & mask) | g3;
+
+	/* mac = (h + nonce) % (2^128) */
+	h0 = (u32)(t = (u64)h0 + nonce[0]);
+	h1 = (u32)(t = (u64)h1 + (t >> 32) + nonce[1]);
+	h2 = (u32)(t = (u64)h2 + (t >> 32) + nonce[2]);
+	h3 = (u32)(t = (u64)h3 + (t >> 32) + nonce[3]);
+
+	omac[0] = cpu_to_le32(h0);
+	omac[1] = cpu_to_le32(h1);
+	omac[2] = cpu_to_le32(h2);
+	omac[3] = cpu_to_le32(h3);
+}
+#endif
+
+static void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE], bool have_simd)
+{
+	ctx->nonce[0] = le32_to_cpuvp(&key[16]);
+	ctx->nonce[1] = le32_to_cpuvp(&key[20]);
+	ctx->nonce[2] = le32_to_cpuvp(&key[24]);
+	ctx->nonce[3] = le32_to_cpuvp(&key[28]);
+
+#if defined(CONFIG_X86_64)
+	poly1305_init_x86_64(ctx->opaque, key);
+	ctx->func.blocks = poly1305_blocks_x86_64;
+	ctx->func.emit = poly1305_emit_x86_64;
+#ifdef CONFIG_AS_AVX512
+	if(chacha20poly1305_use_avx512 && have_simd) {
+		ctx->func.blocks = poly1305_blocks_avx512;
+		ctx->func.emit = poly1305_emit_avx;
+	} else
+#endif
+#ifdef CONFIG_AS_AVX2
+	if (chacha20poly1305_use_avx2 && have_simd) {
+		ctx->func.blocks = poly1305_blocks_avx2;
+		ctx->func.emit = poly1305_emit_avx;
+	} else
+#endif
+#ifdef CONFIG_AS_AVX
+	if (chacha20poly1305_use_avx && have_simd) {
+		ctx->func.blocks = poly1305_blocks_avx;
+		ctx->func.emit = poly1305_emit_avx;
+	}
+#endif
+#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+	poly1305_init_arm(ctx->opaque, key);
+	ctx->func.blocks = poly1305_blocks_arm;
+	ctx->func.emit = poly1305_emit_arm;
+#if defined(ARM_USE_NEON)
+	if (chacha20poly1305_use_neon && have_simd) {
+		ctx->func.blocks = poly1305_blocks_neon;
+		ctx->func.emit = poly1305_emit_neon;
+	}
+#endif
+#elif defined(CONFIG_MIPS) && defined(CONFIG_64BIT)
+	poly1305_init_mips(ctx->opaque, key);
+	ctx->func.blocks = poly1305_blocks_mips;
+	ctx->func.emit = poly1305_emit_mips;
+#else
+	poly1305_init_generic(ctx->opaque, key);
+#endif
+	ctx->num = 0;
+}
+
+static void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, size_t len)
+{
+#if defined(CONFIG_X86_64) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || (defined(CONFIG_MIPS) && defined(CONFIG_64BIT))
+	const poly1305_blocks_f blocks = ctx->func.blocks;
+#else
+	const poly1305_blocks_f blocks = poly1305_blocks_generic;
+#endif
+
+	const size_t num = ctx->num;
+	size_t rem;;
+
+	if (num) {
+		rem = POLY1305_BLOCK_SIZE - num;
+		if (len >= rem) {
+			memcpy(ctx->data + num, inp, rem);
+			blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1);
+			inp += rem;
+			len -= rem;
+		} else {
+			/* Still not enough data to process a block. */
+			memcpy(ctx->data + num, inp, len);
+			ctx->num = num + len;
+			return;
+		}
+	}
+
+	rem = len % POLY1305_BLOCK_SIZE;
+	len -= rem;
+
+	if (len >= POLY1305_BLOCK_SIZE) {
+		blocks(ctx->opaque, inp, len, 1);
+		inp += len;
+	}
+
+	if (rem)
+		memcpy(ctx->data, inp, rem);
+
+	ctx->num = rem;
+}
+
+static void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[16])
+{
+#if defined(CONFIG_X86_64) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || (defined(CONFIG_MIPS) && defined(CONFIG_64BIT))
+	const poly1305_blocks_f blocks = ctx->func.blocks;
+	const poly1305_emit_f emit = ctx->func.emit;
+#else
+	const poly1305_blocks_f blocks = poly1305_blocks_generic;
+	const poly1305_emit_f emit = poly1305_emit_generic;
+#endif
+	size_t num = ctx->num;
+
+	if (num) {
+		ctx->data[num++] = 1;   /* pad bit */
+		while (num < POLY1305_BLOCK_SIZE)
+			ctx->data[num++] = 0;
+		blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0);
+	}
+
+	emit(ctx->opaque, mac, ctx->nonce);
+
+	/* zero out the state */
+	memzero_explicit(ctx, sizeof(*ctx));
+}
+
+
+static const u8 pad0[16] = { 0 };
+
+static struct crypto_alg chacha20_alg = {
+	.cra_blocksize = 1,
+	.cra_alignmask = sizeof(u32) - 1
+};
+static struct crypto_blkcipher chacha20_cipher = {
+	.base = {
+		.__crt_alg = &chacha20_alg
+	}
+};
+static struct blkcipher_desc chacha20_desc = {
+	.tfm = &chacha20_cipher
+};
+
+static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
+					      const u8 *ad, const size_t ad_len,
+					      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
+					      bool have_simd)
+{
+	__le64 len, le_nonce = cpu_to_le64(nonce);
+	struct poly1305_ctx poly1305_state;
+	struct chacha20_ctx chacha20_state = chacha20_initial_state(key, (u8 *)&le_nonce);
+	u8 block0[CHACHA20_BLOCK_SIZE] = { 0 };
+
+	chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd);
+	poly1305_init(&poly1305_state, block0, have_simd);
+	memzero_explicit(block0, sizeof(block0));
+
+	poly1305_update(&poly1305_state, ad, ad_len);
+	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
+
+	chacha20_crypt(&chacha20_state, dst, src, src_len, have_simd);
+
+	poly1305_update(&poly1305_state, dst, src_len);
+	poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf);
+
+	len = cpu_to_le64(ad_len);
+	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len));
+
+	len = cpu_to_le64(src_len);
+	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len));
+
+	poly1305_finish(&poly1305_state, dst + src_len);
+
+	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
+}
+
+void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
+			      const u8 *ad, const size_t ad_len,
+			      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN])
+{
+	bool have_simd;
+
+	have_simd = chacha20poly1305_init_simd();
+	__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd);
+	chacha20poly1305_deinit_simd(have_simd);
+}
+
+bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
+				 const u8 *ad, const size_t ad_len,
+				 const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
+				 bool have_simd)
+{
+	__le64 len, le_nonce = cpu_to_le64(nonce);
+	struct poly1305_ctx poly1305_state;
+	struct chacha20_ctx chacha20_state = chacha20_initial_state(key, (u8 *)&le_nonce);
+	int ret = 0;
+	struct blkcipher_walk walk;
+	u8 block0[CHACHA20_BLOCK_SIZE] = { 0 };
+	u8 mac[POLY1305_MAC_SIZE];
+
+	chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd);
+	poly1305_init(&poly1305_state, block0, have_simd);
+	memzero_explicit(block0, sizeof(block0));
+
+	poly1305_update(&poly1305_state, ad, ad_len);
+	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
+
+	if (likely(src_len)) {
+		blkcipher_walk_init(&walk, dst, src, src_len);
+		ret = blkcipher_walk_virt_block(&chacha20_desc, &walk, CHACHA20_BLOCK_SIZE);
+		while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
+			size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE);
+
+			chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd);
+			poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len);
+			ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE);
+		}
+		if (walk.nbytes) {
+			chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd);
+			poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes);
+			ret = blkcipher_walk_done(&chacha20_desc, &walk, 0);
+		}
+	}
+	if (unlikely(ret))
+		goto err;
+
+	poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf);
+
+	len = cpu_to_le64(ad_len);
+	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len));
+
+	len = cpu_to_le64(src_len);
+	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len));
+
+	poly1305_finish(&poly1305_state, mac);
+	scatterwalk_map_and_copy(mac, dst, src_len, sizeof(mac), 1);
+err:
+	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
+	memzero_explicit(mac, sizeof(mac));
+	return !ret;
+}
+
+static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
+					      const u8 *ad, const size_t ad_len,
+					      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
+					      bool have_simd)
+{
+	__le64 len, le_nonce = cpu_to_le64(nonce);
+	struct poly1305_ctx poly1305_state;
+	struct chacha20_ctx chacha20_state = chacha20_initial_state(key, (u8 *)&le_nonce);
+	int ret;
+	u8 block0[CHACHA20_BLOCK_SIZE] = { 0 };
+	u8 mac[POLY1305_MAC_SIZE];
+	size_t dst_len;
+
+	if (unlikely(src_len < POLY1305_MAC_SIZE))
+		return false;
+
+	chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd);
+	poly1305_init(&poly1305_state, block0, have_simd);
+	memzero_explicit(block0, sizeof(block0));
+
+	poly1305_update(&poly1305_state, ad, ad_len);
+	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
+
+	dst_len = src_len - POLY1305_MAC_SIZE;
+	poly1305_update(&poly1305_state, src, dst_len);
+	poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf);
+
+	len = cpu_to_le64(ad_len);
+	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len));
+
+	len = cpu_to_le64(dst_len);
+	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len));
+
+	poly1305_finish(&poly1305_state, mac);
+
+	ret = crypto_memneq(mac, src + dst_len, POLY1305_MAC_SIZE);
+	memzero_explicit(mac, POLY1305_MAC_SIZE);
+	if (likely(!ret))
+		chacha20_crypt(&chacha20_state, dst, src, dst_len, have_simd);
+
+	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
+
+	return !ret;
+}
+
+bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
+			      const u8 *ad, const size_t ad_len,
+			      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN])
+{
+	bool have_simd, ret;
+
+	have_simd = chacha20poly1305_init_simd();
+	ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd);
+	chacha20poly1305_deinit_simd(have_simd);
+	return ret;
+}
+
+bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
+				 const u8 *ad, const size_t ad_len,
+				 const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
+				 bool have_simd)
+{
+	__le64 len, le_nonce = cpu_to_le64(nonce);
+	struct poly1305_ctx poly1305_state;
+	struct chacha20_ctx chacha20_state = chacha20_initial_state(key, (u8 *)&le_nonce);
+	struct blkcipher_walk walk;
+	int ret = 0;
+	u8 block0[CHACHA20_BLOCK_SIZE] = { 0 };
+	u8 read_mac[POLY1305_MAC_SIZE], computed_mac[POLY1305_MAC_SIZE];
+	size_t dst_len;
+
+	if (unlikely(src_len < POLY1305_MAC_SIZE))
+		return false;
+
+	chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd);
+	poly1305_init(&poly1305_state, block0, have_simd);
+	memzero_explicit(block0, sizeof(block0));
+
+	poly1305_update(&poly1305_state, ad, ad_len);
+	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
+
+	dst_len = src_len - POLY1305_MAC_SIZE;
+	if (likely(dst_len)) {
+		blkcipher_walk_init(&walk, dst, src, dst_len);
+		ret = blkcipher_walk_virt_block(&chacha20_desc, &walk, CHACHA20_BLOCK_SIZE);
+		while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
+			size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE);
+
+			poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len);
+			chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd);
+			ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE);
+		}
+		if (walk.nbytes) {
+			poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes);
+			chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd);
+			ret = blkcipher_walk_done(&chacha20_desc, &walk, 0);
+		}
+	}
+	if (unlikely(ret))
+		goto err;
+
+	poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf);
+
+	len = cpu_to_le64(ad_len);
+	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len));
+
+	len = cpu_to_le64(dst_len);
+	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len));
+
+	poly1305_finish(&poly1305_state, computed_mac);
+
+	scatterwalk_map_and_copy(read_mac, src, dst_len, POLY1305_MAC_SIZE, 0);
+	ret = crypto_memneq(read_mac, computed_mac, POLY1305_MAC_SIZE);
+err:
+	memzero_explicit(read_mac, POLY1305_MAC_SIZE);
+	memzero_explicit(computed_mac, POLY1305_MAC_SIZE);
+	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
+	return !ret;
+}
+
+
+void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
+			       const u8 *ad, const size_t ad_len,
+			       const u8 nonce[XCHACHA20POLY1305_NONCELEN],
+			       const u8 key[CHACHA20POLY1305_KEYLEN])
+{
+	bool have_simd = chacha20poly1305_init_simd();
+	u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
+
+	hchacha20(derived_key, nonce, key, have_simd);
+	__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpuvp(nonce + 16), derived_key, have_simd);
+	memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
+	chacha20poly1305_deinit_simd(have_simd);
+}
+
+bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
+			       const u8 *ad, const size_t ad_len,
+			       const u8 nonce[XCHACHA20POLY1305_NONCELEN],
+			       const u8 key[CHACHA20POLY1305_KEYLEN])
+{
+	bool ret, have_simd = chacha20poly1305_init_simd();
+	u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
+
+	hchacha20(derived_key, nonce, key, have_simd);
+	ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpuvp(nonce + 16), derived_key, have_simd);
+	memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
+	chacha20poly1305_deinit_simd(have_simd);
+	return ret;
+}
+
+#include "../selftest/chacha20poly1305.h"
diff --git b/net/wireguard/crypto/chacha20poly1305.h b/net/wireguard/crypto/chacha20poly1305.h
new file mode 100644
index 0000000..1e54594
--- /dev/null
+++ b/net/wireguard/crypto/chacha20poly1305.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_CHACHA20POLY1305_H
+#define _WG_CHACHA20POLY1305_H
+
+#include <linux/types.h>
+
+struct scatterlist;
+
+enum chacha20poly1305_lengths {
+	XCHACHA20POLY1305_NONCELEN = 24,
+	CHACHA20POLY1305_KEYLEN = 32,
+	CHACHA20POLY1305_AUTHTAGLEN = 16
+};
+
+void chacha20poly1305_fpu_init(void);
+
+void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
+			      const u8 *ad, const size_t ad_len,
+			      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]);
+
+bool __must_check chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
+					      const u8 *ad, const size_t ad_len,
+					      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
+					      bool have_simd);
+
+bool __must_check chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
+					   const u8 *ad, const size_t ad_len,
+					   const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]);
+
+bool __must_check chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
+					      const u8 *ad, const size_t ad_len,
+					      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
+					      bool have_simd);
+
+void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
+			       const u8 *ad, const size_t ad_len,
+			       const u8 nonce[XCHACHA20POLY1305_NONCELEN],
+			       const u8 key[CHACHA20POLY1305_KEYLEN]);
+
+bool __must_check xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
+					    const u8 *ad, const size_t ad_len,
+					    const u8 nonce[XCHACHA20POLY1305_NONCELEN],
+					    const u8 key[CHACHA20POLY1305_KEYLEN]);
+
+#if defined(CONFIG_X86_64)
+#include <linux/version.h>
+#include <asm/fpu/api.h>
+#include <asm/simd.h>
+#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
+#include <asm/neon.h>
+#include <asm/simd.h>
+#endif
+
+static inline bool chacha20poly1305_init_simd(void)
+{
+	bool have_simd = false;
+#if defined(CONFIG_X86_64)
+	have_simd = irq_fpu_usable();
+	if (have_simd)
+		kernel_fpu_begin();
+#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
+#if defined(CONFIG_ARM64)
+	have_simd = true; /* ARM64 supports NEON in any context. */
+#elif defined(CONFIG_ARM)
+	have_simd = may_use_simd(); /* ARM doesn't support NEON in interrupt context. */
+#endif
+	if (have_simd)
+		kernel_neon_begin();
+#endif
+	return have_simd;
+}
+
+static inline void chacha20poly1305_deinit_simd(bool was_on)
+{
+#if defined(CONFIG_X86_64)
+	if (was_on)
+		kernel_fpu_end();
+#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
+	if (was_on)
+		kernel_neon_end();
+#endif
+}
+
+#ifdef DEBUG
+bool chacha20poly1305_selftest(void);
+#endif
+
+#endif /* _WG_CHACHA20POLY1305_H */
diff --git b/net/wireguard/crypto/curve25519-arm.S b/net/wireguard/crypto/curve25519-arm.S
new file mode 100644
index 0000000..0696926
--- /dev/null
+++ b/net/wireguard/crypto/curve25519-arm.S
@@ -0,0 +1,2110 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on algorithms from Daniel J. Bernstein and Peter Schwabe.
+ */
+
+#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
+
+#include <linux/linkage.h>
+
+	.text
+	.fpu		neon
+	.align		4
+
+ENTRY(curve25519_neon)
+	vpush {q4,q5,q6,q7}
+	mov r12,sp
+	sub r3,sp,#736
+	and r3,r3,#0xffffffe0
+	mov sp,r3
+	strd r4,[sp,#0]
+	strd r6,[sp,#8]
+	strd r8,[sp,#16]
+	strd r10,[sp,#24]
+	str r12,[sp,#480]
+	str r14,[sp,#484]
+	mov r0,r0
+	mov r1,r1
+	mov r2,r2
+	add r3,sp,#32
+	ldr r4,=0
+	ldr r5,=254
+	vmov.i32 q0,#1
+	vshr.u64 q1,q0,#7
+	vshr.u64 q0,q0,#8
+	vmov.i32 d4,#19
+	vmov.i32 d5,#38
+	add r6,sp,#512
+	vst1.8 {d2-d3},[r6,: 128]
+	add r6,sp,#528
+	vst1.8 {d0-d1},[r6,: 128]
+	add r6,sp,#544
+	vst1.8 {d4-d5},[r6,: 128]
+	add r6,r3,#0
+	vmov.i32 q2,#0
+	vst1.8 {d4-d5},[r6,: 128]!
+	vst1.8 {d4-d5},[r6,: 128]!
+	vst1.8 d4,[r6,: 64]
+	add r6,r3,#0
+	ldr r7,=960
+	sub r7,r7,#2
+	neg r7,r7
+	sub r7,r7,r7,LSL #7
+	str r7,[r6]
+	add r6,sp,#704
+	vld1.8 {d4-d5},[r1]!
+	vld1.8 {d6-d7},[r1]
+	vst1.8 {d4-d5},[r6,: 128]!
+	vst1.8 {d6-d7},[r6,: 128]
+	sub r1,r6,#16
+	ldrb r6,[r1]
+	and r6,r6,#248
+	strb r6,[r1]
+	ldrb r6,[r1,#31]
+	and r6,r6,#127
+	orr r6,r6,#64
+	strb r6,[r1,#31]
+	vmov.i64 q2,#0xffffffff
+	vshr.u64 q3,q2,#7
+	vshr.u64 q2,q2,#6
+	vld1.8 {d8},[r2]
+	vld1.8 {d10},[r2]
+	add r2,r2,#6
+	vld1.8 {d12},[r2]
+	vld1.8 {d14},[r2]
+	add r2,r2,#6
+	vld1.8 {d16},[r2]
+	add r2,r2,#4
+	vld1.8 {d18},[r2]
+	vld1.8 {d20},[r2]
+	add r2,r2,#6
+	vld1.8 {d22},[r2]
+	add r2,r2,#2
+	vld1.8 {d24},[r2]
+	vld1.8 {d26},[r2]
+	vshr.u64 q5,q5,#26
+	vshr.u64 q6,q6,#3
+	vshr.u64 q7,q7,#29
+	vshr.u64 q8,q8,#6
+	vshr.u64 q10,q10,#25
+	vshr.u64 q11,q11,#3
+	vshr.u64 q12,q12,#12
+	vshr.u64 q13,q13,#38
+	vand q4,q4,q2
+	vand q6,q6,q2
+	vand q8,q8,q2
+	vand q10,q10,q2
+	vand q2,q12,q2
+	vand q5,q5,q3
+	vand q7,q7,q3
+	vand q9,q9,q3
+	vand q11,q11,q3
+	vand q3,q13,q3
+	add r2,r3,#48
+	vadd.i64 q12,q4,q1
+	vadd.i64 q13,q10,q1
+	vshr.s64 q12,q12,#26
+	vshr.s64 q13,q13,#26
+	vadd.i64 q5,q5,q12
+	vshl.i64 q12,q12,#26
+	vadd.i64 q14,q5,q0
+	vadd.i64 q11,q11,q13
+	vshl.i64 q13,q13,#26
+	vadd.i64 q15,q11,q0
+	vsub.i64 q4,q4,q12
+	vshr.s64 q12,q14,#25
+	vsub.i64 q10,q10,q13
+	vshr.s64 q13,q15,#25
+	vadd.i64 q6,q6,q12
+	vshl.i64 q12,q12,#25
+	vadd.i64 q14,q6,q1
+	vadd.i64 q2,q2,q13
+	vsub.i64 q5,q5,q12
+	vshr.s64 q12,q14,#26
+	vshl.i64 q13,q13,#25
+	vadd.i64 q14,q2,q1
+	vadd.i64 q7,q7,q12
+	vshl.i64 q12,q12,#26
+	vadd.i64 q15,q7,q0
+	vsub.i64 q11,q11,q13
+	vshr.s64 q13,q14,#26
+	vsub.i64 q6,q6,q12
+	vshr.s64 q12,q15,#25
+	vadd.i64 q3,q3,q13
+	vshl.i64 q13,q13,#26
+	vadd.i64 q14,q3,q0
+	vadd.i64 q8,q8,q12
+	vshl.i64 q12,q12,#25
+	vadd.i64 q15,q8,q1
+	add r2,r2,#8
+	vsub.i64 q2,q2,q13
+	vshr.s64 q13,q14,#25
+	vsub.i64 q7,q7,q12
+	vshr.s64 q12,q15,#26
+	vadd.i64 q14,q13,q13
+	vadd.i64 q9,q9,q12
+	vtrn.32 d12,d14
+	vshl.i64 q12,q12,#26
+	vtrn.32 d13,d15
+	vadd.i64 q0,q9,q0
+	vadd.i64 q4,q4,q14
+	vst1.8 d12,[r2,: 64]!
+	vshl.i64 q6,q13,#4
+	vsub.i64 q7,q8,q12
+	vshr.s64 q0,q0,#25
+	vadd.i64 q4,q4,q6
+	vadd.i64 q6,q10,q0
+	vshl.i64 q0,q0,#25
+	vadd.i64 q8,q6,q1
+	vadd.i64 q4,q4,q13
+	vshl.i64 q10,q13,#25
+	vadd.i64 q1,q4,q1
+	vsub.i64 q0,q9,q0
+	vshr.s64 q8,q8,#26
+	vsub.i64 q3,q3,q10
+	vtrn.32 d14,d0
+	vshr.s64 q1,q1,#26
+	vtrn.32 d15,d1
+	vadd.i64 q0,q11,q8
+	vst1.8 d14,[r2,: 64]
+	vshl.i64 q7,q8,#26
+	vadd.i64 q5,q5,q1
+	vtrn.32 d4,d6
+	vshl.i64 q1,q1,#26
+	vtrn.32 d5,d7
+	vsub.i64 q3,q6,q7
+	add r2,r2,#16
+	vsub.i64 q1,q4,q1
+	vst1.8 d4,[r2,: 64]
+	vtrn.32 d6,d0
+	vtrn.32 d7,d1
+	sub r2,r2,#8
+	vtrn.32 d2,d10
+	vtrn.32 d3,d11
+	vst1.8 d6,[r2,: 64]
+	sub r2,r2,#24
+	vst1.8 d2,[r2,: 64]
+	add r2,r3,#96
+	vmov.i32 q0,#0
+	vmov.i64 d2,#0xff
+	vmov.i64 d3,#0
+	vshr.u32 q1,q1,#7
+	vst1.8 {d2-d3},[r2,: 128]!
+	vst1.8 {d0-d1},[r2,: 128]!
+	vst1.8 d0,[r2,: 64]
+	add r2,r3,#144
+	vmov.i32 q0,#0
+	vst1.8 {d0-d1},[r2,: 128]!
+	vst1.8 {d0-d1},[r2,: 128]!
+	vst1.8 d0,[r2,: 64]
+	add r2,r3,#240
+	vmov.i32 q0,#0
+	vmov.i64 d2,#0xff
+	vmov.i64 d3,#0
+	vshr.u32 q1,q1,#7
+	vst1.8 {d2-d3},[r2,: 128]!
+	vst1.8 {d0-d1},[r2,: 128]!
+	vst1.8 d0,[r2,: 64]
+	add r2,r3,#48
+	add r6,r3,#192
+	vld1.8 {d0-d1},[r2,: 128]!
+	vld1.8 {d2-d3},[r2,: 128]!
+	vld1.8 {d4},[r2,: 64]
+	vst1.8 {d0-d1},[r6,: 128]!
+	vst1.8 {d2-d3},[r6,: 128]!
+	vst1.8 d4,[r6,: 64]
+	.Lmainloop:
+	mov r2,r5,LSR #3
+	and r6,r5,#7
+	ldrb r2,[r1,r2]
+	mov r2,r2,LSR r6
+	and r2,r2,#1
+	str r5,[sp,#488]
+	eor r4,r4,r2
+	str r2,[sp,#492]
+	neg r2,r4
+	add r4,r3,#96
+	add r5,r3,#192
+	add r6,r3,#144
+	vld1.8 {d8-d9},[r4,: 128]!
+	add r7,r3,#240
+	vld1.8 {d10-d11},[r5,: 128]!
+	veor q6,q4,q5
+	vld1.8 {d14-d15},[r6,: 128]!
+	vdup.i32 q8,r2
+	vld1.8 {d18-d19},[r7,: 128]!
+	veor q10,q7,q9
+	vld1.8 {d22-d23},[r4,: 128]!
+	vand q6,q6,q8
+	vld1.8 {d24-d25},[r5,: 128]!
+	vand q10,q10,q8
+	vld1.8 {d26-d27},[r6,: 128]!
+	veor q4,q4,q6
+	vld1.8 {d28-d29},[r7,: 128]!
+	veor q5,q5,q6
+	vld1.8 {d0},[r4,: 64]
+	veor q6,q7,q10
+	vld1.8 {d2},[r5,: 64]
+	veor q7,q9,q10
+	vld1.8 {d4},[r6,: 64]
+	veor q9,q11,q12
+	vld1.8 {d6},[r7,: 64]
+	veor q10,q0,q1
+	sub r2,r4,#32
+	vand q9,q9,q8
+	sub r4,r5,#32
+	vand q10,q10,q8
+	sub r5,r6,#32
+	veor q11,q11,q9
+	sub r6,r7,#32
+	veor q0,q0,q10
+	veor q9,q12,q9
+	veor q1,q1,q10
+	veor q10,q13,q14
+	veor q12,q2,q3
+	vand q10,q10,q8
+	vand q8,q12,q8
+	veor q12,q13,q10
+	veor q2,q2,q8
+	veor q10,q14,q10
+	veor q3,q3,q8
+	vadd.i32 q8,q4,q6
+	vsub.i32 q4,q4,q6
+	vst1.8 {d16-d17},[r2,: 128]!
+	vadd.i32 q6,q11,q12
+	vst1.8 {d8-d9},[r5,: 128]!
+	vsub.i32 q4,q11,q12
+	vst1.8 {d12-d13},[r2,: 128]!
+	vadd.i32 q6,q0,q2
+	vst1.8 {d8-d9},[r5,: 128]!
+	vsub.i32 q0,q0,q2
+	vst1.8 d12,[r2,: 64]
+	vadd.i32 q2,q5,q7
+	vst1.8 d0,[r5,: 64]
+	vsub.i32 q0,q5,q7
+	vst1.8 {d4-d5},[r4,: 128]!
+	vadd.i32 q2,q9,q10
+	vst1.8 {d0-d1},[r6,: 128]!
+	vsub.i32 q0,q9,q10
+	vst1.8 {d4-d5},[r4,: 128]!
+	vadd.i32 q2,q1,q3
+	vst1.8 {d0-d1},[r6,: 128]!
+	vsub.i32 q0,q1,q3
+	vst1.8 d4,[r4,: 64]
+	vst1.8 d0,[r6,: 64]
+	add r2,sp,#544
+	add r4,r3,#96
+	add r5,r3,#144
+	vld1.8 {d0-d1},[r2,: 128]
+	vld1.8 {d2-d3},[r4,: 128]!
+	vld1.8 {d4-d5},[r5,: 128]!
+	vzip.i32 q1,q2
+	vld1.8 {d6-d7},[r4,: 128]!
+	vld1.8 {d8-d9},[r5,: 128]!
+	vshl.i32 q5,q1,#1
+	vzip.i32 q3,q4
+	vshl.i32 q6,q2,#1
+	vld1.8 {d14},[r4,: 64]
+	vshl.i32 q8,q3,#1
+	vld1.8 {d15},[r5,: 64]
+	vshl.i32 q9,q4,#1
+	vmul.i32 d21,d7,d1
+	vtrn.32 d14,d15
+	vmul.i32 q11,q4,q0
+	vmul.i32 q0,q7,q0
+	vmull.s32 q12,d2,d2
+	vmlal.s32 q12,d11,d1
+	vmlal.s32 q12,d12,d0
+	vmlal.s32 q12,d13,d23
+	vmlal.s32 q12,d16,d22
+	vmlal.s32 q12,d7,d21
+	vmull.s32 q10,d2,d11
+	vmlal.s32 q10,d4,d1
+	vmlal.s32 q10,d13,d0
+	vmlal.s32 q10,d6,d23
+	vmlal.s32 q10,d17,d22
+	vmull.s32 q13,d10,d4
+	vmlal.s32 q13,d11,d3
+	vmlal.s32 q13,d13,d1
+	vmlal.s32 q13,d16,d0
+	vmlal.s32 q13,d17,d23
+	vmlal.s32 q13,d8,d22
+	vmull.s32 q1,d10,d5
+	vmlal.s32 q1,d11,d4
+	vmlal.s32 q1,d6,d1
+	vmlal.s32 q1,d17,d0
+	vmlal.s32 q1,d8,d23
+	vmull.s32 q14,d10,d6
+	vmlal.s32 q14,d11,d13
+	vmlal.s32 q14,d4,d4
+	vmlal.s32 q14,d17,d1
+	vmlal.s32 q14,d18,d0
+	vmlal.s32 q14,d9,d23
+	vmull.s32 q11,d10,d7
+	vmlal.s32 q11,d11,d6
+	vmlal.s32 q11,d12,d5
+	vmlal.s32 q11,d8,d1
+	vmlal.s32 q11,d19,d0
+	vmull.s32 q15,d10,d8
+	vmlal.s32 q15,d11,d17
+	vmlal.s32 q15,d12,d6
+	vmlal.s32 q15,d13,d5
+	vmlal.s32 q15,d19,d1
+	vmlal.s32 q15,d14,d0
+	vmull.s32 q2,d10,d9
+	vmlal.s32 q2,d11,d8
+	vmlal.s32 q2,d12,d7
+	vmlal.s32 q2,d13,d6
+	vmlal.s32 q2,d14,d1
+	vmull.s32 q0,d15,d1
+	vmlal.s32 q0,d10,d14
+	vmlal.s32 q0,d11,d19
+	vmlal.s32 q0,d12,d8
+	vmlal.s32 q0,d13,d17
+	vmlal.s32 q0,d6,d6
+	add r2,sp,#512
+	vld1.8 {d18-d19},[r2,: 128]
+	vmull.s32 q3,d16,d7
+	vmlal.s32 q3,d10,d15
+	vmlal.s32 q3,d11,d14
+	vmlal.s32 q3,d12,d9
+	vmlal.s32 q3,d13,d8
+	add r2,sp,#528
+	vld1.8 {d8-d9},[r2,: 128]
+	vadd.i64 q5,q12,q9
+	vadd.i64 q6,q15,q9
+	vshr.s64 q5,q5,#26
+	vshr.s64 q6,q6,#26
+	vadd.i64 q7,q10,q5
+	vshl.i64 q5,q5,#26
+	vadd.i64 q8,q7,q4
+	vadd.i64 q2,q2,q6
+	vshl.i64 q6,q6,#26
+	vadd.i64 q10,q2,q4
+	vsub.i64 q5,q12,q5
+	vshr.s64 q8,q8,#25
+	vsub.i64 q6,q15,q6
+	vshr.s64 q10,q10,#25
+	vadd.i64 q12,q13,q8
+	vshl.i64 q8,q8,#25
+	vadd.i64 q13,q12,q9
+	vadd.i64 q0,q0,q10
+	vsub.i64 q7,q7,q8
+	vshr.s64 q8,q13,#26
+	vshl.i64 q10,q10,#25
+	vadd.i64 q13,q0,q9
+	vadd.i64 q1,q1,q8
+	vshl.i64 q8,q8,#26
+	vadd.i64 q15,q1,q4
+	vsub.i64 q2,q2,q10
+	vshr.s64 q10,q13,#26
+	vsub.i64 q8,q12,q8
+	vshr.s64 q12,q15,#25
+	vadd.i64 q3,q3,q10
+	vshl.i64 q10,q10,#26
+	vadd.i64 q13,q3,q4
+	vadd.i64 q14,q14,q12
+	add r2,r3,#288
+	vshl.i64 q12,q12,#25
+	add r4,r3,#336
+	vadd.i64 q15,q14,q9
+	add r2,r2,#8
+	vsub.i64 q0,q0,q10
+	add r4,r4,#8
+	vshr.s64 q10,q13,#25
+	vsub.i64 q1,q1,q12
+	vshr.s64 q12,q15,#26
+	vadd.i64 q13,q10,q10
+	vadd.i64 q11,q11,q12
+	vtrn.32 d16,d2
+	vshl.i64 q12,q12,#26
+	vtrn.32 d17,d3
+	vadd.i64 q1,q11,q4
+	vadd.i64 q4,q5,q13
+	vst1.8 d16,[r2,: 64]!
+	vshl.i64 q5,q10,#4
+	vst1.8 d17,[r4,: 64]!
+	vsub.i64 q8,q14,q12
+	vshr.s64 q1,q1,#25
+	vadd.i64 q4,q4,q5
+	vadd.i64 q5,q6,q1
+	vshl.i64 q1,q1,#25
+	vadd.i64 q6,q5,q9
+	vadd.i64 q4,q4,q10
+	vshl.i64 q10,q10,#25
+	vadd.i64 q9,q4,q9
+	vsub.i64 q1,q11,q1
+	vshr.s64 q6,q6,#26
+	vsub.i64 q3,q3,q10
+	vtrn.32 d16,d2
+	vshr.s64 q9,q9,#26
+	vtrn.32 d17,d3
+	vadd.i64 q1,q2,q6
+	vst1.8 d16,[r2,: 64]
+	vshl.i64 q2,q6,#26
+	vst1.8 d17,[r4,: 64]
+	vadd.i64 q6,q7,q9
+	vtrn.32 d0,d6
+	vshl.i64 q7,q9,#26
+	vtrn.32 d1,d7
+	vsub.i64 q2,q5,q2
+	add r2,r2,#16
+	vsub.i64 q3,q4,q7
+	vst1.8 d0,[r2,: 64]
+	add r4,r4,#16
+	vst1.8 d1,[r4,: 64]
+	vtrn.32 d4,d2
+	vtrn.32 d5,d3
+	sub r2,r2,#8
+	sub r4,r4,#8
+	vtrn.32 d6,d12
+	vtrn.32 d7,d13
+	vst1.8 d4,[r2,: 64]
+	vst1.8 d5,[r4,: 64]
+	sub r2,r2,#24
+	sub r4,r4,#24
+	vst1.8 d6,[r2,: 64]
+	vst1.8 d7,[r4,: 64]
+	add r2,r3,#240
+	add r4,r3,#96
+	vld1.8 {d0-d1},[r4,: 128]!
+	vld1.8 {d2-d3},[r4,: 128]!
+	vld1.8 {d4},[r4,: 64]
+	add r4,r3,#144
+	vld1.8 {d6-d7},[r4,: 128]!
+	vtrn.32 q0,q3
+	vld1.8 {d8-d9},[r4,: 128]!
+	vshl.i32 q5,q0,#4
+	vtrn.32 q1,q4
+	vshl.i32 q6,q3,#4
+	vadd.i32 q5,q5,q0
+	vadd.i32 q6,q6,q3
+	vshl.i32 q7,q1,#4
+	vld1.8 {d5},[r4,: 64]
+	vshl.i32 q8,q4,#4
+	vtrn.32 d4,d5
+	vadd.i32 q7,q7,q1
+	vadd.i32 q8,q8,q4
+	vld1.8 {d18-d19},[r2,: 128]!
+	vshl.i32 q10,q2,#4
+	vld1.8 {d22-d23},[r2,: 128]!
+	vadd.i32 q10,q10,q2
+	vld1.8 {d24},[r2,: 64]
+	vadd.i32 q5,q5,q0
+	add r2,r3,#192
+	vld1.8 {d26-d27},[r2,: 128]!
+	vadd.i32 q6,q6,q3
+	vld1.8 {d28-d29},[r2,: 128]!
+	vadd.i32 q8,q8,q4
+	vld1.8 {d25},[r2,: 64]
+	vadd.i32 q10,q10,q2
+	vtrn.32 q9,q13
+	vadd.i32 q7,q7,q1
+	vadd.i32 q5,q5,q0
+	vtrn.32 q11,q14
+	vadd.i32 q6,q6,q3
+	add r2,sp,#560
+	vadd.i32 q10,q10,q2
+	vtrn.32 d24,d25
+	vst1.8 {d12-d13},[r2,: 128]
+	vshl.i32 q6,q13,#1
+	add r2,sp,#576
+	vst1.8 {d20-d21},[r2,: 128]
+	vshl.i32 q10,q14,#1
+	add r2,sp,#592
+	vst1.8 {d12-d13},[r2,: 128]
+	vshl.i32 q15,q12,#1
+	vadd.i32 q8,q8,q4
+	vext.32 d10,d31,d30,#0
+	vadd.i32 q7,q7,q1
+	add r2,sp,#608
+	vst1.8 {d16-d17},[r2,: 128]
+	vmull.s32 q8,d18,d5
+	vmlal.s32 q8,d26,d4
+	vmlal.s32 q8,d19,d9
+	vmlal.s32 q8,d27,d3
+	vmlal.s32 q8,d22,d8
+	vmlal.s32 q8,d28,d2
+	vmlal.s32 q8,d23,d7
+	vmlal.s32 q8,d29,d1
+	vmlal.s32 q8,d24,d6
+	vmlal.s32 q8,d25,d0
+	add r2,sp,#624
+	vst1.8 {d14-d15},[r2,: 128]
+	vmull.s32 q2,d18,d4
+	vmlal.s32 q2,d12,d9
+	vmlal.s32 q2,d13,d8
+	vmlal.s32 q2,d19,d3
+	vmlal.s32 q2,d22,d2
+	vmlal.s32 q2,d23,d1
+	vmlal.s32 q2,d24,d0
+	add r2,sp,#640
+	vst1.8 {d20-d21},[r2,: 128]
+	vmull.s32 q7,d18,d9
+	vmlal.s32 q7,d26,d3
+	vmlal.s32 q7,d19,d8
+	vmlal.s32 q7,d27,d2
+	vmlal.s32 q7,d22,d7
+	vmlal.s32 q7,d28,d1
+	vmlal.s32 q7,d23,d6
+	vmlal.s32 q7,d29,d0
+	add r2,sp,#656
+	vst1.8 {d10-d11},[r2,: 128]
+	vmull.s32 q5,d18,d3
+	vmlal.s32 q5,d19,d2
+	vmlal.s32 q5,d22,d1
+	vmlal.s32 q5,d23,d0
+	vmlal.s32 q5,d12,d8
+	add r2,sp,#672
+	vst1.8 {d16-d17},[r2,: 128]
+	vmull.s32 q4,d18,d8
+	vmlal.s32 q4,d26,d2
+	vmlal.s32 q4,d19,d7
+	vmlal.s32 q4,d27,d1
+	vmlal.s32 q4,d22,d6
+	vmlal.s32 q4,d28,d0
+	vmull.s32 q8,d18,d7
+	vmlal.s32 q8,d26,d1
+	vmlal.s32 q8,d19,d6
+	vmlal.s32 q8,d27,d0
+	add r2,sp,#576
+	vld1.8 {d20-d21},[r2,: 128]
+	vmlal.s32 q7,d24,d21
+	vmlal.s32 q7,d25,d20
+	vmlal.s32 q4,d23,d21
+	vmlal.s32 q4,d29,d20
+	vmlal.s32 q8,d22,d21
+	vmlal.s32 q8,d28,d20
+	vmlal.s32 q5,d24,d20
+	add r2,sp,#576
+	vst1.8 {d14-d15},[r2,: 128]
+	vmull.s32 q7,d18,d6
+	vmlal.s32 q7,d26,d0
+	add r2,sp,#656
+	vld1.8 {d30-d31},[r2,: 128]
+	vmlal.s32 q2,d30,d21
+	vmlal.s32 q7,d19,d21
+	vmlal.s32 q7,d27,d20
+	add r2,sp,#624
+	vld1.8 {d26-d27},[r2,: 128]
+	vmlal.s32 q4,d25,d27
+	vmlal.s32 q8,d29,d27
+	vmlal.s32 q8,d25,d26
+	vmlal.s32 q7,d28,d27
+	vmlal.s32 q7,d29,d26
+	add r2,sp,#608
+	vld1.8 {d28-d29},[r2,: 128]
+	vmlal.s32 q4,d24,d29
+	vmlal.s32 q8,d23,d29
+	vmlal.s32 q8,d24,d28
+	vmlal.s32 q7,d22,d29
+	vmlal.s32 q7,d23,d28
+	add r2,sp,#608
+	vst1.8 {d8-d9},[r2,: 128]
+	add r2,sp,#560
+	vld1.8 {d8-d9},[r2,: 128]
+	vmlal.s32 q7,d24,d9
+	vmlal.s32 q7,d25,d31
+	vmull.s32 q1,d18,d2
+	vmlal.s32 q1,d19,d1
+	vmlal.s32 q1,d22,d0
+	vmlal.s32 q1,d24,d27
+	vmlal.s32 q1,d23,d20
+	vmlal.s32 q1,d12,d7
+	vmlal.s32 q1,d13,d6
+	vmull.s32 q6,d18,d1
+	vmlal.s32 q6,d19,d0
+	vmlal.s32 q6,d23,d27
+	vmlal.s32 q6,d22,d20
+	vmlal.s32 q6,d24,d26
+	vmull.s32 q0,d18,d0
+	vmlal.s32 q0,d22,d27
+	vmlal.s32 q0,d23,d26
+	vmlal.s32 q0,d24,d31
+	vmlal.s32 q0,d19,d20
+	add r2,sp,#640
+	vld1.8 {d18-d19},[r2,: 128]
+	vmlal.s32 q2,d18,d7
+	vmlal.s32 q2,d19,d6
+	vmlal.s32 q5,d18,d6
+	vmlal.s32 q5,d19,d21
+	vmlal.s32 q1,d18,d21
+	vmlal.s32 q1,d19,d29
+	vmlal.s32 q0,d18,d28
+	vmlal.s32 q0,d19,d9
+	vmlal.s32 q6,d18,d29
+	vmlal.s32 q6,d19,d28
+	add r2,sp,#592
+	vld1.8 {d18-d19},[r2,: 128]
+	add r2,sp,#512
+	vld1.8 {d22-d23},[r2,: 128]
+	vmlal.s32 q5,d19,d7
+	vmlal.s32 q0,d18,d21
+	vmlal.s32 q0,d19,d29
+	vmlal.s32 q6,d18,d6
+	add r2,sp,#528
+	vld1.8 {d6-d7},[r2,: 128]
+	vmlal.s32 q6,d19,d21
+	add r2,sp,#576
+	vld1.8 {d18-d19},[r2,: 128]
+	vmlal.s32 q0,d30,d8
+	add r2,sp,#672
+	vld1.8 {d20-d21},[r2,: 128]
+	vmlal.s32 q5,d30,d29
+	add r2,sp,#608
+	vld1.8 {d24-d25},[r2,: 128]
+	vmlal.s32 q1,d30,d28
+	vadd.i64 q13,q0,q11
+	vadd.i64 q14,q5,q11
+	vmlal.s32 q6,d30,d9
+	vshr.s64 q4,q13,#26
+	vshr.s64 q13,q14,#26
+	vadd.i64 q7,q7,q4
+	vshl.i64 q4,q4,#26
+	vadd.i64 q14,q7,q3
+	vadd.i64 q9,q9,q13
+	vshl.i64 q13,q13,#26
+	vadd.i64 q15,q9,q3
+	vsub.i64 q0,q0,q4
+	vshr.s64 q4,q14,#25
+	vsub.i64 q5,q5,q13
+	vshr.s64 q13,q15,#25
+	vadd.i64 q6,q6,q4
+	vshl.i64 q4,q4,#25
+	vadd.i64 q14,q6,q11
+	vadd.i64 q2,q2,q13
+	vsub.i64 q4,q7,q4
+	vshr.s64 q7,q14,#26
+	vshl.i64 q13,q13,#25
+	vadd.i64 q14,q2,q11
+	vadd.i64 q8,q8,q7
+	vshl.i64 q7,q7,#26
+	vadd.i64 q15,q8,q3
+	vsub.i64 q9,q9,q13
+	vshr.s64 q13,q14,#26
+	vsub.i64 q6,q6,q7
+	vshr.s64 q7,q15,#25
+	vadd.i64 q10,q10,q13
+	vshl.i64 q13,q13,#26
+	vadd.i64 q14,q10,q3
+	vadd.i64 q1,q1,q7
+	add r2,r3,#144
+	vshl.i64 q7,q7,#25
+	add r4,r3,#96
+	vadd.i64 q15,q1,q11
+	add r2,r2,#8
+	vsub.i64 q2,q2,q13
+	add r4,r4,#8
+	vshr.s64 q13,q14,#25
+	vsub.i64 q7,q8,q7
+	vshr.s64 q8,q15,#26
+	vadd.i64 q14,q13,q13
+	vadd.i64 q12,q12,q8
+	vtrn.32 d12,d14
+	vshl.i64 q8,q8,#26
+	vtrn.32 d13,d15
+	vadd.i64 q3,q12,q3
+	vadd.i64 q0,q0,q14
+	vst1.8 d12,[r2,: 64]!
+	vshl.i64 q7,q13,#4
+	vst1.8 d13,[r4,: 64]!
+	vsub.i64 q1,q1,q8
+	vshr.s64 q3,q3,#25
+	vadd.i64 q0,q0,q7
+	vadd.i64 q5,q5,q3
+	vshl.i64 q3,q3,#25
+	vadd.i64 q6,q5,q11
+	vadd.i64 q0,q0,q13
+	vshl.i64 q7,q13,#25
+	vadd.i64 q8,q0,q11
+	vsub.i64 q3,q12,q3
+	vshr.s64 q6,q6,#26
+	vsub.i64 q7,q10,q7
+	vtrn.32 d2,d6
+	vshr.s64 q8,q8,#26
+	vtrn.32 d3,d7
+	vadd.i64 q3,q9,q6
+	vst1.8 d2,[r2,: 64]
+	vshl.i64 q6,q6,#26
+	vst1.8 d3,[r4,: 64]
+	vadd.i64 q1,q4,q8
+	vtrn.32 d4,d14
+	vshl.i64 q4,q8,#26
+	vtrn.32 d5,d15
+	vsub.i64 q5,q5,q6
+	add r2,r2,#16
+	vsub.i64 q0,q0,q4
+	vst1.8 d4,[r2,: 64]
+	add r4,r4,#16
+	vst1.8 d5,[r4,: 64]
+	vtrn.32 d10,d6
+	vtrn.32 d11,d7
+	sub r2,r2,#8
+	sub r4,r4,#8
+	vtrn.32 d0,d2
+	vtrn.32 d1,d3
+	vst1.8 d10,[r2,: 64]
+	vst1.8 d11,[r4,: 64]
+	sub r2,r2,#24
+	sub r4,r4,#24
+	vst1.8 d0,[r2,: 64]
+	vst1.8 d1,[r4,: 64]
+	add r2,r3,#288
+	add r4,r3,#336
+	vld1.8 {d0-d1},[r2,: 128]!
+	vld1.8 {d2-d3},[r4,: 128]!
+	vsub.i32 q0,q0,q1
+	vld1.8 {d2-d3},[r2,: 128]!
+	vld1.8 {d4-d5},[r4,: 128]!
+	vsub.i32 q1,q1,q2
+	add r5,r3,#240
+	vld1.8 {d4},[r2,: 64]
+	vld1.8 {d6},[r4,: 64]
+	vsub.i32 q2,q2,q3
+	vst1.8 {d0-d1},[r5,: 128]!
+	vst1.8 {d2-d3},[r5,: 128]!
+	vst1.8 d4,[r5,: 64]
+	add r2,r3,#144
+	add r4,r3,#96
+	add r5,r3,#144
+	add r6,r3,#192
+	vld1.8 {d0-d1},[r2,: 128]!
+	vld1.8 {d2-d3},[r4,: 128]!
+	vsub.i32 q2,q0,q1
+	vadd.i32 q0,q0,q1
+	vld1.8 {d2-d3},[r2,: 128]!
+	vld1.8 {d6-d7},[r4,: 128]!
+	vsub.i32 q4,q1,q3
+	vadd.i32 q1,q1,q3
+	vld1.8 {d6},[r2,: 64]
+	vld1.8 {d10},[r4,: 64]
+	vsub.i32 q6,q3,q5
+	vadd.i32 q3,q3,q5
+	vst1.8 {d4-d5},[r5,: 128]!
+	vst1.8 {d0-d1},[r6,: 128]!
+	vst1.8 {d8-d9},[r5,: 128]!
+	vst1.8 {d2-d3},[r6,: 128]!
+	vst1.8 d12,[r5,: 64]
+	vst1.8 d6,[r6,: 64]
+	add r2,r3,#0
+	add r4,r3,#240
+	vld1.8 {d0-d1},[r4,: 128]!
+	vld1.8 {d2-d3},[r4,: 128]!
+	vld1.8 {d4},[r4,: 64]
+	add r4,r3,#336
+	vld1.8 {d6-d7},[r4,: 128]!
+	vtrn.32 q0,q3
+	vld1.8 {d8-d9},[r4,: 128]!
+	vshl.i32 q5,q0,#4
+	vtrn.32 q1,q4
+	vshl.i32 q6,q3,#4
+	vadd.i32 q5,q5,q0
+	vadd.i32 q6,q6,q3
+	vshl.i32 q7,q1,#4
+	vld1.8 {d5},[r4,: 64]
+	vshl.i32 q8,q4,#4
+	vtrn.32 d4,d5
+	vadd.i32 q7,q7,q1
+	vadd.i32 q8,q8,q4
+	vld1.8 {d18-d19},[r2,: 128]!
+	vshl.i32 q10,q2,#4
+	vld1.8 {d22-d23},[r2,: 128]!
+	vadd.i32 q10,q10,q2
+	vld1.8 {d24},[r2,: 64]
+	vadd.i32 q5,q5,q0
+	add r2,r3,#288
+	vld1.8 {d26-d27},[r2,: 128]!
+	vadd.i32 q6,q6,q3
+	vld1.8 {d28-d29},[r2,: 128]!
+	vadd.i32 q8,q8,q4
+	vld1.8 {d25},[r2,: 64]
+	vadd.i32 q10,q10,q2
+	vtrn.32 q9,q13
+	vadd.i32 q7,q7,q1
+	vadd.i32 q5,q5,q0
+	vtrn.32 q11,q14
+	vadd.i32 q6,q6,q3
+	add r2,sp,#560
+	vadd.i32 q10,q10,q2
+	vtrn.32 d24,d25
+	vst1.8 {d12-d13},[r2,: 128]
+	vshl.i32 q6,q13,#1
+	add r2,sp,#576
+	vst1.8 {d20-d21},[r2,: 128]
+	vshl.i32 q10,q14,#1
+	add r2,sp,#592
+	vst1.8 {d12-d13},[r2,: 128]
+	vshl.i32 q15,q12,#1
+	vadd.i32 q8,q8,q4
+	vext.32 d10,d31,d30,#0
+	vadd.i32 q7,q7,q1
+	add r2,sp,#608
+	vst1.8 {d16-d17},[r2,: 128]
+	vmull.s32 q8,d18,d5
+	vmlal.s32 q8,d26,d4
+	vmlal.s32 q8,d19,d9
+	vmlal.s32 q8,d27,d3
+	vmlal.s32 q8,d22,d8
+	vmlal.s32 q8,d28,d2
+	vmlal.s32 q8,d23,d7
+	vmlal.s32 q8,d29,d1
+	vmlal.s32 q8,d24,d6
+	vmlal.s32 q8,d25,d0
+	add r2,sp,#624
+	vst1.8 {d14-d15},[r2,: 128]
+	vmull.s32 q2,d18,d4
+	vmlal.s32 q2,d12,d9
+	vmlal.s32 q2,d13,d8
+	vmlal.s32 q2,d19,d3
+	vmlal.s32 q2,d22,d2
+	vmlal.s32 q2,d23,d1
+	vmlal.s32 q2,d24,d0
+	add r2,sp,#640
+	vst1.8 {d20-d21},[r2,: 128]
+	vmull.s32 q7,d18,d9
+	vmlal.s32 q7,d26,d3
+	vmlal.s32 q7,d19,d8
+	vmlal.s32 q7,d27,d2
+	vmlal.s32 q7,d22,d7
+	vmlal.s32 q7,d28,d1
+	vmlal.s32 q7,d23,d6
+	vmlal.s32 q7,d29,d0
+	add r2,sp,#656
+	vst1.8 {d10-d11},[r2,: 128]
+	vmull.s32 q5,d18,d3
+	vmlal.s32 q5,d19,d2
+	vmlal.s32 q5,d22,d1
+	vmlal.s32 q5,d23,d0
+	vmlal.s32 q5,d12,d8
+	add r2,sp,#672
+	vst1.8 {d16-d17},[r2,: 128]
+	vmull.s32 q4,d18,d8
+	vmlal.s32 q4,d26,d2
+	vmlal.s32 q4,d19,d7
+	vmlal.s32 q4,d27,d1
+	vmlal.s32 q4,d22,d6
+	vmlal.s32 q4,d28,d0
+	vmull.s32 q8,d18,d7
+	vmlal.s32 q8,d26,d1
+	vmlal.s32 q8,d19,d6
+	vmlal.s32 q8,d27,d0
+	add r2,sp,#576
+	vld1.8 {d20-d21},[r2,: 128]
+	vmlal.s32 q7,d24,d21
+	vmlal.s32 q7,d25,d20
+	vmlal.s32 q4,d23,d21
+	vmlal.s32 q4,d29,d20
+	vmlal.s32 q8,d22,d21
+	vmlal.s32 q8,d28,d20
+	vmlal.s32 q5,d24,d20
+	add r2,sp,#576
+	vst1.8 {d14-d15},[r2,: 128]
+	vmull.s32 q7,d18,d6
+	vmlal.s32 q7,d26,d0
+	add r2,sp,#656
+	vld1.8 {d30-d31},[r2,: 128]
+	vmlal.s32 q2,d30,d21
+	vmlal.s32 q7,d19,d21
+	vmlal.s32 q7,d27,d20
+	add r2,sp,#624
+	vld1.8 {d26-d27},[r2,: 128]
+	vmlal.s32 q4,d25,d27
+	vmlal.s32 q8,d29,d27
+	vmlal.s32 q8,d25,d26
+	vmlal.s32 q7,d28,d27
+	vmlal.s32 q7,d29,d26
+	add r2,sp,#608
+	vld1.8 {d28-d29},[r2,: 128]
+	vmlal.s32 q4,d24,d29
+	vmlal.s32 q8,d23,d29
+	vmlal.s32 q8,d24,d28
+	vmlal.s32 q7,d22,d29
+	vmlal.s32 q7,d23,d28
+	add r2,sp,#608
+	vst1.8 {d8-d9},[r2,: 128]
+	add r2,sp,#560
+	vld1.8 {d8-d9},[r2,: 128]
+	vmlal.s32 q7,d24,d9
+	vmlal.s32 q7,d25,d31
+	vmull.s32 q1,d18,d2
+	vmlal.s32 q1,d19,d1
+	vmlal.s32 q1,d22,d0
+	vmlal.s32 q1,d24,d27
+	vmlal.s32 q1,d23,d20
+	vmlal.s32 q1,d12,d7
+	vmlal.s32 q1,d13,d6
+	vmull.s32 q6,d18,d1
+	vmlal.s32 q6,d19,d0
+	vmlal.s32 q6,d23,d27
+	vmlal.s32 q6,d22,d20
+	vmlal.s32 q6,d24,d26
+	vmull.s32 q0,d18,d0
+	vmlal.s32 q0,d22,d27
+	vmlal.s32 q0,d23,d26
+	vmlal.s32 q0,d24,d31
+	vmlal.s32 q0,d19,d20
+	add r2,sp,#640
+	vld1.8 {d18-d19},[r2,: 128]
+	vmlal.s32 q2,d18,d7
+	vmlal.s32 q2,d19,d6
+	vmlal.s32 q5,d18,d6
+	vmlal.s32 q5,d19,d21
+	vmlal.s32 q1,d18,d21
+	vmlal.s32 q1,d19,d29
+	vmlal.s32 q0,d18,d28
+	vmlal.s32 q0,d19,d9
+	vmlal.s32 q6,d18,d29
+	vmlal.s32 q6,d19,d28
+	add r2,sp,#592
+	vld1.8 {d18-d19},[r2,: 128]
+	add r2,sp,#512
+	vld1.8 {d22-d23},[r2,: 128]
+	vmlal.s32 q5,d19,d7
+	vmlal.s32 q0,d18,d21
+	vmlal.s32 q0,d19,d29
+	vmlal.s32 q6,d18,d6
+	add r2,sp,#528
+	vld1.8 {d6-d7},[r2,: 128]
+	vmlal.s32 q6,d19,d21
+	add r2,sp,#576
+	vld1.8 {d18-d19},[r2,: 128]
+	vmlal.s32 q0,d30,d8
+	add r2,sp,#672
+	vld1.8 {d20-d21},[r2,: 128]
+	vmlal.s32 q5,d30,d29
+	add r2,sp,#608
+	vld1.8 {d24-d25},[r2,: 128]
+	vmlal.s32 q1,d30,d28
+	vadd.i64 q13,q0,q11
+	vadd.i64 q14,q5,q11
+	vmlal.s32 q6,d30,d9
+	vshr.s64 q4,q13,#26
+	vshr.s64 q13,q14,#26
+	vadd.i64 q7,q7,q4
+	vshl.i64 q4,q4,#26
+	vadd.i64 q14,q7,q3
+	vadd.i64 q9,q9,q13
+	vshl.i64 q13,q13,#26
+	vadd.i64 q15,q9,q3
+	vsub.i64 q0,q0,q4
+	vshr.s64 q4,q14,#25
+	vsub.i64 q5,q5,q13
+	vshr.s64 q13,q15,#25
+	vadd.i64 q6,q6,q4
+	vshl.i64 q4,q4,#25
+	vadd.i64 q14,q6,q11
+	vadd.i64 q2,q2,q13
+	vsub.i64 q4,q7,q4
+	vshr.s64 q7,q14,#26
+	vshl.i64 q13,q13,#25
+	vadd.i64 q14,q2,q11
+	vadd.i64 q8,q8,q7
+	vshl.i64 q7,q7,#26
+	vadd.i64 q15,q8,q3
+	vsub.i64 q9,q9,q13
+	vshr.s64 q13,q14,#26
+	vsub.i64 q6,q6,q7
+	vshr.s64 q7,q15,#25
+	vadd.i64 q10,q10,q13
+	vshl.i64 q13,q13,#26
+	vadd.i64 q14,q10,q3
+	vadd.i64 q1,q1,q7
+	add r2,r3,#288
+	vshl.i64 q7,q7,#25
+	add r4,r3,#96
+	vadd.i64 q15,q1,q11
+	add r2,r2,#8
+	vsub.i64 q2,q2,q13
+	add r4,r4,#8
+	vshr.s64 q13,q14,#25
+	vsub.i64 q7,q8,q7
+	vshr.s64 q8,q15,#26
+	vadd.i64 q14,q13,q13
+	vadd.i64 q12,q12,q8
+	vtrn.32 d12,d14
+	vshl.i64 q8,q8,#26
+	vtrn.32 d13,d15
+	vadd.i64 q3,q12,q3
+	vadd.i64 q0,q0,q14
+	vst1.8 d12,[r2,: 64]!
+	vshl.i64 q7,q13,#4
+	vst1.8 d13,[r4,: 64]!
+	vsub.i64 q1,q1,q8
+	vshr.s64 q3,q3,#25
+	vadd.i64 q0,q0,q7
+	vadd.i64 q5,q5,q3
+	vshl.i64 q3,q3,#25
+	vadd.i64 q6,q5,q11
+	vadd.i64 q0,q0,q13
+	vshl.i64 q7,q13,#25
+	vadd.i64 q8,q0,q11
+	vsub.i64 q3,q12,q3
+	vshr.s64 q6,q6,#26
+	vsub.i64 q7,q10,q7
+	vtrn.32 d2,d6
+	vshr.s64 q8,q8,#26
+	vtrn.32 d3,d7
+	vadd.i64 q3,q9,q6
+	vst1.8 d2,[r2,: 64]
+	vshl.i64 q6,q6,#26
+	vst1.8 d3,[r4,: 64]
+	vadd.i64 q1,q4,q8
+	vtrn.32 d4,d14
+	vshl.i64 q4,q8,#26
+	vtrn.32 d5,d15
+	vsub.i64 q5,q5,q6
+	add r2,r2,#16
+	vsub.i64 q0,q0,q4
+	vst1.8 d4,[r2,: 64]
+	add r4,r4,#16
+	vst1.8 d5,[r4,: 64]
+	vtrn.32 d10,d6
+	vtrn.32 d11,d7
+	sub r2,r2,#8
+	sub r4,r4,#8
+	vtrn.32 d0,d2
+	vtrn.32 d1,d3
+	vst1.8 d10,[r2,: 64]
+	vst1.8 d11,[r4,: 64]
+	sub r2,r2,#24
+	sub r4,r4,#24
+	vst1.8 d0,[r2,: 64]
+	vst1.8 d1,[r4,: 64]
+	add r2,sp,#544
+	add r4,r3,#144
+	add r5,r3,#192
+	vld1.8 {d0-d1},[r2,: 128]
+	vld1.8 {d2-d3},[r4,: 128]!
+	vld1.8 {d4-d5},[r5,: 128]!
+	vzip.i32 q1,q2
+	vld1.8 {d6-d7},[r4,: 128]!
+	vld1.8 {d8-d9},[r5,: 128]!
+	vshl.i32 q5,q1,#1
+	vzip.i32 q3,q4
+	vshl.i32 q6,q2,#1
+	vld1.8 {d14},[r4,: 64]
+	vshl.i32 q8,q3,#1
+	vld1.8 {d15},[r5,: 64]
+	vshl.i32 q9,q4,#1
+	vmul.i32 d21,d7,d1
+	vtrn.32 d14,d15
+	vmul.i32 q11,q4,q0
+	vmul.i32 q0,q7,q0
+	vmull.s32 q12,d2,d2
+	vmlal.s32 q12,d11,d1
+	vmlal.s32 q12,d12,d0
+	vmlal.s32 q12,d13,d23
+	vmlal.s32 q12,d16,d22
+	vmlal.s32 q12,d7,d21
+	vmull.s32 q10,d2,d11
+	vmlal.s32 q10,d4,d1
+	vmlal.s32 q10,d13,d0
+	vmlal.s32 q10,d6,d23
+	vmlal.s32 q10,d17,d22
+	vmull.s32 q13,d10,d4
+	vmlal.s32 q13,d11,d3
+	vmlal.s32 q13,d13,d1
+	vmlal.s32 q13,d16,d0
+	vmlal.s32 q13,d17,d23
+	vmlal.s32 q13,d8,d22
+	vmull.s32 q1,d10,d5
+	vmlal.s32 q1,d11,d4
+	vmlal.s32 q1,d6,d1
+	vmlal.s32 q1,d17,d0
+	vmlal.s32 q1,d8,d23
+	vmull.s32 q14,d10,d6
+	vmlal.s32 q14,d11,d13
+	vmlal.s32 q14,d4,d4
+	vmlal.s32 q14,d17,d1
+	vmlal.s32 q14,d18,d0
+	vmlal.s32 q14,d9,d23
+	vmull.s32 q11,d10,d7
+	vmlal.s32 q11,d11,d6
+	vmlal.s32 q11,d12,d5
+	vmlal.s32 q11,d8,d1
+	vmlal.s32 q11,d19,d0
+	vmull.s32 q15,d10,d8
+	vmlal.s32 q15,d11,d17
+	vmlal.s32 q15,d12,d6
+	vmlal.s32 q15,d13,d5
+	vmlal.s32 q15,d19,d1
+	vmlal.s32 q15,d14,d0
+	vmull.s32 q2,d10,d9
+	vmlal.s32 q2,d11,d8
+	vmlal.s32 q2,d12,d7
+	vmlal.s32 q2,d13,d6
+	vmlal.s32 q2,d14,d1
+	vmull.s32 q0,d15,d1
+	vmlal.s32 q0,d10,d14
+	vmlal.s32 q0,d11,d19
+	vmlal.s32 q0,d12,d8
+	vmlal.s32 q0,d13,d17
+	vmlal.s32 q0,d6,d6
+	add r2,sp,#512
+	vld1.8 {d18-d19},[r2,: 128]
+	vmull.s32 q3,d16,d7
+	vmlal.s32 q3,d10,d15
+	vmlal.s32 q3,d11,d14
+	vmlal.s32 q3,d12,d9
+	vmlal.s32 q3,d13,d8
+	add r2,sp,#528
+	vld1.8 {d8-d9},[r2,: 128]
+	vadd.i64 q5,q12,q9
+	vadd.i64 q6,q15,q9
+	vshr.s64 q5,q5,#26
+	vshr.s64 q6,q6,#26
+	vadd.i64 q7,q10,q5
+	vshl.i64 q5,q5,#26
+	vadd.i64 q8,q7,q4
+	vadd.i64 q2,q2,q6
+	vshl.i64 q6,q6,#26
+	vadd.i64 q10,q2,q4
+	vsub.i64 q5,q12,q5
+	vshr.s64 q8,q8,#25
+	vsub.i64 q6,q15,q6
+	vshr.s64 q10,q10,#25
+	vadd.i64 q12,q13,q8
+	vshl.i64 q8,q8,#25
+	vadd.i64 q13,q12,q9
+	vadd.i64 q0,q0,q10
+	vsub.i64 q7,q7,q8
+	vshr.s64 q8,q13,#26
+	vshl.i64 q10,q10,#25
+	vadd.i64 q13,q0,q9
+	vadd.i64 q1,q1,q8
+	vshl.i64 q8,q8,#26
+	vadd.i64 q15,q1,q4
+	vsub.i64 q2,q2,q10
+	vshr.s64 q10,q13,#26
+	vsub.i64 q8,q12,q8
+	vshr.s64 q12,q15,#25
+	vadd.i64 q3,q3,q10
+	vshl.i64 q10,q10,#26
+	vadd.i64 q13,q3,q4
+	vadd.i64 q14,q14,q12
+	add r2,r3,#144
+	vshl.i64 q12,q12,#25
+	add r4,r3,#192
+	vadd.i64 q15,q14,q9
+	add r2,r2,#8
+	vsub.i64 q0,q0,q10
+	add r4,r4,#8
+	vshr.s64 q10,q13,#25
+	vsub.i64 q1,q1,q12
+	vshr.s64 q12,q15,#26
+	vadd.i64 q13,q10,q10
+	vadd.i64 q11,q11,q12
+	vtrn.32 d16,d2
+	vshl.i64 q12,q12,#26
+	vtrn.32 d17,d3
+	vadd.i64 q1,q11,q4
+	vadd.i64 q4,q5,q13
+	vst1.8 d16,[r2,: 64]!
+	vshl.i64 q5,q10,#4
+	vst1.8 d17,[r4,: 64]!
+	vsub.i64 q8,q14,q12
+	vshr.s64 q1,q1,#25
+	vadd.i64 q4,q4,q5
+	vadd.i64 q5,q6,q1
+	vshl.i64 q1,q1,#25
+	vadd.i64 q6,q5,q9
+	vadd.i64 q4,q4,q10
+	vshl.i64 q10,q10,#25
+	vadd.i64 q9,q4,q9
+	vsub.i64 q1,q11,q1
+	vshr.s64 q6,q6,#26
+	vsub.i64 q3,q3,q10
+	vtrn.32 d16,d2
+	vshr.s64 q9,q9,#26
+	vtrn.32 d17,d3
+	vadd.i64 q1,q2,q6
+	vst1.8 d16,[r2,: 64]
+	vshl.i64 q2,q6,#26
+	vst1.8 d17,[r4,: 64]
+	vadd.i64 q6,q7,q9
+	vtrn.32 d0,d6
+	vshl.i64 q7,q9,#26
+	vtrn.32 d1,d7
+	vsub.i64 q2,q5,q2
+	add r2,r2,#16
+	vsub.i64 q3,q4,q7
+	vst1.8 d0,[r2,: 64]
+	add r4,r4,#16
+	vst1.8 d1,[r4,: 64]
+	vtrn.32 d4,d2
+	vtrn.32 d5,d3
+	sub r2,r2,#8
+	sub r4,r4,#8
+	vtrn.32 d6,d12
+	vtrn.32 d7,d13
+	vst1.8 d4,[r2,: 64]
+	vst1.8 d5,[r4,: 64]
+	sub r2,r2,#24
+	sub r4,r4,#24
+	vst1.8 d6,[r2,: 64]
+	vst1.8 d7,[r4,: 64]
+	add r2,r3,#336
+	add r4,r3,#288
+	vld1.8 {d0-d1},[r2,: 128]!
+	vld1.8 {d2-d3},[r4,: 128]!
+	vadd.i32 q0,q0,q1
+	vld1.8 {d2-d3},[r2,: 128]!
+	vld1.8 {d4-d5},[r4,: 128]!
+	vadd.i32 q1,q1,q2
+	add r5,r3,#288
+	vld1.8 {d4},[r2,: 64]
+	vld1.8 {d6},[r4,: 64]
+	vadd.i32 q2,q2,q3
+	vst1.8 {d0-d1},[r5,: 128]!
+	vst1.8 {d2-d3},[r5,: 128]!
+	vst1.8 d4,[r5,: 64]
+	add r2,r3,#48
+	add r4,r3,#144
+	vld1.8 {d0-d1},[r4,: 128]!
+	vld1.8 {d2-d3},[r4,: 128]!
+	vld1.8 {d4},[r4,: 64]
+	add r4,r3,#288
+	vld1.8 {d6-d7},[r4,: 128]!
+	vtrn.32 q0,q3
+	vld1.8 {d8-d9},[r4,: 128]!
+	vshl.i32 q5,q0,#4
+	vtrn.32 q1,q4
+	vshl.i32 q6,q3,#4
+	vadd.i32 q5,q5,q0
+	vadd.i32 q6,q6,q3
+	vshl.i32 q7,q1,#4
+	vld1.8 {d5},[r4,: 64]
+	vshl.i32 q8,q4,#4
+	vtrn.32 d4,d5
+	vadd.i32 q7,q7,q1
+	vadd.i32 q8,q8,q4
+	vld1.8 {d18-d19},[r2,: 128]!
+	vshl.i32 q10,q2,#4
+	vld1.8 {d22-d23},[r2,: 128]!
+	vadd.i32 q10,q10,q2
+	vld1.8 {d24},[r2,: 64]
+	vadd.i32 q5,q5,q0
+	add r2,r3,#240
+	vld1.8 {d26-d27},[r2,: 128]!
+	vadd.i32 q6,q6,q3
+	vld1.8 {d28-d29},[r2,: 128]!
+	vadd.i32 q8,q8,q4
+	vld1.8 {d25},[r2,: 64]
+	vadd.i32 q10,q10,q2
+	vtrn.32 q9,q13
+	vadd.i32 q7,q7,q1
+	vadd.i32 q5,q5,q0
+	vtrn.32 q11,q14
+	vadd.i32 q6,q6,q3
+	add r2,sp,#560
+	vadd.i32 q10,q10,q2
+	vtrn.32 d24,d25
+	vst1.8 {d12-d13},[r2,: 128]
+	vshl.i32 q6,q13,#1
+	add r2,sp,#576
+	vst1.8 {d20-d21},[r2,: 128]
+	vshl.i32 q10,q14,#1
+	add r2,sp,#592
+	vst1.8 {d12-d13},[r2,: 128]
+	vshl.i32 q15,q12,#1
+	vadd.i32 q8,q8,q4
+	vext.32 d10,d31,d30,#0
+	vadd.i32 q7,q7,q1
+	add r2,sp,#608
+	vst1.8 {d16-d17},[r2,: 128]
+	vmull.s32 q8,d18,d5
+	vmlal.s32 q8,d26,d4
+	vmlal.s32 q8,d19,d9
+	vmlal.s32 q8,d27,d3
+	vmlal.s32 q8,d22,d8
+	vmlal.s32 q8,d28,d2
+	vmlal.s32 q8,d23,d7
+	vmlal.s32 q8,d29,d1
+	vmlal.s32 q8,d24,d6
+	vmlal.s32 q8,d25,d0
+	add r2,sp,#624
+	vst1.8 {d14-d15},[r2,: 128]
+	vmull.s32 q2,d18,d4
+	vmlal.s32 q2,d12,d9
+	vmlal.s32 q2,d13,d8
+	vmlal.s32 q2,d19,d3
+	vmlal.s32 q2,d22,d2
+	vmlal.s32 q2,d23,d1
+	vmlal.s32 q2,d24,d0
+	add r2,sp,#640
+	vst1.8 {d20-d21},[r2,: 128]
+	vmull.s32 q7,d18,d9
+	vmlal.s32 q7,d26,d3
+	vmlal.s32 q7,d19,d8
+	vmlal.s32 q7,d27,d2
+	vmlal.s32 q7,d22,d7
+	vmlal.s32 q7,d28,d1
+	vmlal.s32 q7,d23,d6
+	vmlal.s32 q7,d29,d0
+	add r2,sp,#656
+	vst1.8 {d10-d11},[r2,: 128]
+	vmull.s32 q5,d18,d3
+	vmlal.s32 q5,d19,d2
+	vmlal.s32 q5,d22,d1
+	vmlal.s32 q5,d23,d0
+	vmlal.s32 q5,d12,d8
+	add r2,sp,#672
+	vst1.8 {d16-d17},[r2,: 128]
+	vmull.s32 q4,d18,d8
+	vmlal.s32 q4,d26,d2
+	vmlal.s32 q4,d19,d7
+	vmlal.s32 q4,d27,d1
+	vmlal.s32 q4,d22,d6
+	vmlal.s32 q4,d28,d0
+	vmull.s32 q8,d18,d7
+	vmlal.s32 q8,d26,d1
+	vmlal.s32 q8,d19,d6
+	vmlal.s32 q8,d27,d0
+	add r2,sp,#576
+	vld1.8 {d20-d21},[r2,: 128]
+	vmlal.s32 q7,d24,d21
+	vmlal.s32 q7,d25,d20
+	vmlal.s32 q4,d23,d21
+	vmlal.s32 q4,d29,d20
+	vmlal.s32 q8,d22,d21
+	vmlal.s32 q8,d28,d20
+	vmlal.s32 q5,d24,d20
+	add r2,sp,#576
+	vst1.8 {d14-d15},[r2,: 128]
+	vmull.s32 q7,d18,d6
+	vmlal.s32 q7,d26,d0
+	add r2,sp,#656
+	vld1.8 {d30-d31},[r2,: 128]
+	vmlal.s32 q2,d30,d21
+	vmlal.s32 q7,d19,d21
+	vmlal.s32 q7,d27,d20
+	add r2,sp,#624
+	vld1.8 {d26-d27},[r2,: 128]
+	vmlal.s32 q4,d25,d27
+	vmlal.s32 q8,d29,d27
+	vmlal.s32 q8,d25,d26
+	vmlal.s32 q7,d28,d27
+	vmlal.s32 q7,d29,d26
+	add r2,sp,#608
+	vld1.8 {d28-d29},[r2,: 128]
+	vmlal.s32 q4,d24,d29
+	vmlal.s32 q8,d23,d29
+	vmlal.s32 q8,d24,d28
+	vmlal.s32 q7,d22,d29
+	vmlal.s32 q7,d23,d28
+	add r2,sp,#608
+	vst1.8 {d8-d9},[r2,: 128]
+	add r2,sp,#560
+	vld1.8 {d8-d9},[r2,: 128]
+	vmlal.s32 q7,d24,d9
+	vmlal.s32 q7,d25,d31
+	vmull.s32 q1,d18,d2
+	vmlal.s32 q1,d19,d1
+	vmlal.s32 q1,d22,d0
+	vmlal.s32 q1,d24,d27
+	vmlal.s32 q1,d23,d20
+	vmlal.s32 q1,d12,d7
+	vmlal.s32 q1,d13,d6
+	vmull.s32 q6,d18,d1
+	vmlal.s32 q6,d19,d0
+	vmlal.s32 q6,d23,d27
+	vmlal.s32 q6,d22,d20
+	vmlal.s32 q6,d24,d26
+	vmull.s32 q0,d18,d0
+	vmlal.s32 q0,d22,d27
+	vmlal.s32 q0,d23,d26
+	vmlal.s32 q0,d24,d31
+	vmlal.s32 q0,d19,d20
+	add r2,sp,#640
+	vld1.8 {d18-d19},[r2,: 128]
+	vmlal.s32 q2,d18,d7
+	vmlal.s32 q2,d19,d6
+	vmlal.s32 q5,d18,d6
+	vmlal.s32 q5,d19,d21
+	vmlal.s32 q1,d18,d21
+	vmlal.s32 q1,d19,d29
+	vmlal.s32 q0,d18,d28
+	vmlal.s32 q0,d19,d9
+	vmlal.s32 q6,d18,d29
+	vmlal.s32 q6,d19,d28
+	add r2,sp,#592
+	vld1.8 {d18-d19},[r2,: 128]
+	add r2,sp,#512
+	vld1.8 {d22-d23},[r2,: 128]
+	vmlal.s32 q5,d19,d7
+	vmlal.s32 q0,d18,d21
+	vmlal.s32 q0,d19,d29
+	vmlal.s32 q6,d18,d6
+	add r2,sp,#528
+	vld1.8 {d6-d7},[r2,: 128]
+	vmlal.s32 q6,d19,d21
+	add r2,sp,#576
+	vld1.8 {d18-d19},[r2,: 128]
+	vmlal.s32 q0,d30,d8
+	add r2,sp,#672
+	vld1.8 {d20-d21},[r2,: 128]
+	vmlal.s32 q5,d30,d29
+	add r2,sp,#608
+	vld1.8 {d24-d25},[r2,: 128]
+	vmlal.s32 q1,d30,d28
+	vadd.i64 q13,q0,q11
+	vadd.i64 q14,q5,q11
+	vmlal.s32 q6,d30,d9
+	vshr.s64 q4,q13,#26
+	vshr.s64 q13,q14,#26
+	vadd.i64 q7,q7,q4
+	vshl.i64 q4,q4,#26
+	vadd.i64 q14,q7,q3
+	vadd.i64 q9,q9,q13
+	vshl.i64 q13,q13,#26
+	vadd.i64 q15,q9,q3
+	vsub.i64 q0,q0,q4
+	vshr.s64 q4,q14,#25
+	vsub.i64 q5,q5,q13
+	vshr.s64 q13,q15,#25
+	vadd.i64 q6,q6,q4
+	vshl.i64 q4,q4,#25
+	vadd.i64 q14,q6,q11
+	vadd.i64 q2,q2,q13
+	vsub.i64 q4,q7,q4
+	vshr.s64 q7,q14,#26
+	vshl.i64 q13,q13,#25
+	vadd.i64 q14,q2,q11
+	vadd.i64 q8,q8,q7
+	vshl.i64 q7,q7,#26
+	vadd.i64 q15,q8,q3
+	vsub.i64 q9,q9,q13
+	vshr.s64 q13,q14,#26
+	vsub.i64 q6,q6,q7
+	vshr.s64 q7,q15,#25
+	vadd.i64 q10,q10,q13
+	vshl.i64 q13,q13,#26
+	vadd.i64 q14,q10,q3
+	vadd.i64 q1,q1,q7
+	add r2,r3,#240
+	vshl.i64 q7,q7,#25
+	add r4,r3,#144
+	vadd.i64 q15,q1,q11
+	add r2,r2,#8
+	vsub.i64 q2,q2,q13
+	add r4,r4,#8
+	vshr.s64 q13,q14,#25
+	vsub.i64 q7,q8,q7
+	vshr.s64 q8,q15,#26
+	vadd.i64 q14,q13,q13
+	vadd.i64 q12,q12,q8
+	vtrn.32 d12,d14
+	vshl.i64 q8,q8,#26
+	vtrn.32 d13,d15
+	vadd.i64 q3,q12,q3
+	vadd.i64 q0,q0,q14
+	vst1.8 d12,[r2,: 64]!
+	vshl.i64 q7,q13,#4
+	vst1.8 d13,[r4,: 64]!
+	vsub.i64 q1,q1,q8
+	vshr.s64 q3,q3,#25
+	vadd.i64 q0,q0,q7
+	vadd.i64 q5,q5,q3
+	vshl.i64 q3,q3,#25
+	vadd.i64 q6,q5,q11
+	vadd.i64 q0,q0,q13
+	vshl.i64 q7,q13,#25
+	vadd.i64 q8,q0,q11
+	vsub.i64 q3,q12,q3
+	vshr.s64 q6,q6,#26
+	vsub.i64 q7,q10,q7
+	vtrn.32 d2,d6
+	vshr.s64 q8,q8,#26
+	vtrn.32 d3,d7
+	vadd.i64 q3,q9,q6
+	vst1.8 d2,[r2,: 64]
+	vshl.i64 q6,q6,#26
+	vst1.8 d3,[r4,: 64]
+	vadd.i64 q1,q4,q8
+	vtrn.32 d4,d14
+	vshl.i64 q4,q8,#26
+	vtrn.32 d5,d15
+	vsub.i64 q5,q5,q6
+	add r2,r2,#16
+	vsub.i64 q0,q0,q4
+	vst1.8 d4,[r2,: 64]
+	add r4,r4,#16
+	vst1.8 d5,[r4,: 64]
+	vtrn.32 d10,d6
+	vtrn.32 d11,d7
+	sub r2,r2,#8
+	sub r4,r4,#8
+	vtrn.32 d0,d2
+	vtrn.32 d1,d3
+	vst1.8 d10,[r2,: 64]
+	vst1.8 d11,[r4,: 64]
+	sub r2,r2,#24
+	sub r4,r4,#24
+	vst1.8 d0,[r2,: 64]
+	vst1.8 d1,[r4,: 64]
+	ldr r2,[sp,#488]
+	ldr r4,[sp,#492]
+	subs r5,r2,#1
+	bge .Lmainloop
+	add r1,r3,#144
+	add r2,r3,#336
+	vld1.8 {d0-d1},[r1,: 128]!
+	vld1.8 {d2-d3},[r1,: 128]!
+	vld1.8 {d4},[r1,: 64]
+	vst1.8 {d0-d1},[r2,: 128]!
+	vst1.8 {d2-d3},[r2,: 128]!
+	vst1.8 d4,[r2,: 64]
+	ldr r1,=0
+	.Linvertloop:
+	add r2,r3,#144
+	ldr r4,=0
+	ldr r5,=2
+	cmp r1,#1
+	ldreq r5,=1
+	addeq r2,r3,#336
+	addeq r4,r3,#48
+	cmp r1,#2
+	ldreq r5,=1
+	addeq r2,r3,#48
+	cmp r1,#3
+	ldreq r5,=5
+	addeq r4,r3,#336
+	cmp r1,#4
+	ldreq r5,=10
+	cmp r1,#5
+	ldreq r5,=20
+	cmp r1,#6
+	ldreq r5,=10
+	addeq r2,r3,#336
+	addeq r4,r3,#336
+	cmp r1,#7
+	ldreq r5,=50
+	cmp r1,#8
+	ldreq r5,=100
+	cmp r1,#9
+	ldreq r5,=50
+	addeq r2,r3,#336
+	cmp r1,#10
+	ldreq r5,=5
+	addeq r2,r3,#48
+	cmp r1,#11
+	ldreq r5,=0
+	addeq r2,r3,#96
+	add r6,r3,#144
+	add r7,r3,#288
+	vld1.8 {d0-d1},[r6,: 128]!
+	vld1.8 {d2-d3},[r6,: 128]!
+	vld1.8 {d4},[r6,: 64]
+	vst1.8 {d0-d1},[r7,: 128]!
+	vst1.8 {d2-d3},[r7,: 128]!
+	vst1.8 d4,[r7,: 64]
+	cmp r5,#0
+	beq .Lskipsquaringloop
+	.Lsquaringloop:
+	add r6,r3,#288
+	add r7,r3,#288
+	add r8,r3,#288
+	vmov.i32 q0,#19
+	vmov.i32 q1,#0
+	vmov.i32 q2,#1
+	vzip.i32 q1,q2
+	vld1.8 {d4-d5},[r7,: 128]!
+	vld1.8 {d6-d7},[r7,: 128]!
+	vld1.8 {d9},[r7,: 64]
+	vld1.8 {d10-d11},[r6,: 128]!
+	add r7,sp,#416
+	vld1.8 {d12-d13},[r6,: 128]!
+	vmul.i32 q7,q2,q0
+	vld1.8 {d8},[r6,: 64]
+	vext.32 d17,d11,d10,#1
+	vmul.i32 q9,q3,q0
+	vext.32 d16,d10,d8,#1
+	vshl.u32 q10,q5,q1
+	vext.32 d22,d14,d4,#1
+	vext.32 d24,d18,d6,#1
+	vshl.u32 q13,q6,q1
+	vshl.u32 d28,d8,d2
+	vrev64.i32 d22,d22
+	vmul.i32 d1,d9,d1
+	vrev64.i32 d24,d24
+	vext.32 d29,d8,d13,#1
+	vext.32 d0,d1,d9,#1
+	vrev64.i32 d0,d0
+	vext.32 d2,d9,d1,#1
+	vext.32 d23,d15,d5,#1
+	vmull.s32 q4,d20,d4
+	vrev64.i32 d23,d23
+	vmlal.s32 q4,d21,d1
+	vrev64.i32 d2,d2
+	vmlal.s32 q4,d26,d19
+	vext.32 d3,d5,d15,#1
+	vmlal.s32 q4,d27,d18
+	vrev64.i32 d3,d3
+	vmlal.s32 q4,d28,d15
+	vext.32 d14,d12,d11,#1
+	vmull.s32 q5,d16,d23
+	vext.32 d15,d13,d12,#1
+	vmlal.s32 q5,d17,d4
+	vst1.8 d8,[r7,: 64]!
+	vmlal.s32 q5,d14,d1
+	vext.32 d12,d9,d8,#0
+	vmlal.s32 q5,d15,d19
+	vmov.i64 d13,#0
+	vmlal.s32 q5,d29,d18
+	vext.32 d25,d19,d7,#1
+	vmlal.s32 q6,d20,d5
+	vrev64.i32 d25,d25
+	vmlal.s32 q6,d21,d4
+	vst1.8 d11,[r7,: 64]!
+	vmlal.s32 q6,d26,d1
+	vext.32 d9,d10,d10,#0
+	vmlal.s32 q6,d27,d19
+	vmov.i64 d8,#0
+	vmlal.s32 q6,d28,d18
+	vmlal.s32 q4,d16,d24
+	vmlal.s32 q4,d17,d5
+	vmlal.s32 q4,d14,d4
+	vst1.8 d12,[r7,: 64]!
+	vmlal.s32 q4,d15,d1
+	vext.32 d10,d13,d12,#0
+	vmlal.s32 q4,d29,d19
+	vmov.i64 d11,#0
+	vmlal.s32 q5,d20,d6
+	vmlal.s32 q5,d21,d5
+	vmlal.s32 q5,d26,d4
+	vext.32 d13,d8,d8,#0
+	vmlal.s32 q5,d27,d1
+	vmov.i64 d12,#0
+	vmlal.s32 q5,d28,d19
+	vst1.8 d9,[r7,: 64]!
+	vmlal.s32 q6,d16,d25
+	vmlal.s32 q6,d17,d6
+	vst1.8 d10,[r7,: 64]
+	vmlal.s32 q6,d14,d5
+	vext.32 d8,d11,d10,#0
+	vmlal.s32 q6,d15,d4
+	vmov.i64 d9,#0
+	vmlal.s32 q6,d29,d1
+	vmlal.s32 q4,d20,d7
+	vmlal.s32 q4,d21,d6
+	vmlal.s32 q4,d26,d5
+	vext.32 d11,d12,d12,#0
+	vmlal.s32 q4,d27,d4
+	vmov.i64 d10,#0
+	vmlal.s32 q4,d28,d1
+	vmlal.s32 q5,d16,d0
+	sub r6,r7,#32
+	vmlal.s32 q5,d17,d7
+	vmlal.s32 q5,d14,d6
+	vext.32 d30,d9,d8,#0
+	vmlal.s32 q5,d15,d5
+	vld1.8 {d31},[r6,: 64]!
+	vmlal.s32 q5,d29,d4
+	vmlal.s32 q15,d20,d0
+	vext.32 d0,d6,d18,#1
+	vmlal.s32 q15,d21,d25
+	vrev64.i32 d0,d0
+	vmlal.s32 q15,d26,d24
+	vext.32 d1,d7,d19,#1
+	vext.32 d7,d10,d10,#0
+	vmlal.s32 q15,d27,d23
+	vrev64.i32 d1,d1
+	vld1.8 {d6},[r6,: 64]
+	vmlal.s32 q15,d28,d22
+	vmlal.s32 q3,d16,d4
+	add r6,r6,#24
+	vmlal.s32 q3,d17,d2
+	vext.32 d4,d31,d30,#0
+	vmov d17,d11
+	vmlal.s32 q3,d14,d1
+	vext.32 d11,d13,d13,#0
+	vext.32 d13,d30,d30,#0
+	vmlal.s32 q3,d15,d0
+	vext.32 d1,d8,d8,#0
+	vmlal.s32 q3,d29,d3
+	vld1.8 {d5},[r6,: 64]
+	sub r6,r6,#16
+	vext.32 d10,d6,d6,#0
+	vmov.i32 q1,#0xffffffff
+	vshl.i64 q4,q1,#25
+	add r7,sp,#512
+	vld1.8 {d14-d15},[r7,: 128]
+	vadd.i64 q9,q2,q7
+	vshl.i64 q1,q1,#26
+	vshr.s64 q10,q9,#26
+	vld1.8 {d0},[r6,: 64]!
+	vadd.i64 q5,q5,q10
+	vand q9,q9,q1
+	vld1.8 {d16},[r6,: 64]!
+	add r6,sp,#528
+	vld1.8 {d20-d21},[r6,: 128]
+	vadd.i64 q11,q5,q10
+	vsub.i64 q2,q2,q9
+	vshr.s64 q9,q11,#25
+	vext.32 d12,d5,d4,#0
+	vand q11,q11,q4
+	vadd.i64 q0,q0,q9
+	vmov d19,d7
+	vadd.i64 q3,q0,q7
+	vsub.i64 q5,q5,q11
+	vshr.s64 q11,q3,#26
+	vext.32 d18,d11,d10,#0
+	vand q3,q3,q1
+	vadd.i64 q8,q8,q11
+	vadd.i64 q11,q8,q10
+	vsub.i64 q0,q0,q3
+	vshr.s64 q3,q11,#25
+	vand q11,q11,q4
+	vadd.i64 q3,q6,q3
+	vadd.i64 q6,q3,q7
+	vsub.i64 q8,q8,q11
+	vshr.s64 q11,q6,#26
+	vand q6,q6,q1
+	vadd.i64 q9,q9,q11
+	vadd.i64 d25,d19,d21
+	vsub.i64 q3,q3,q6
+	vshr.s64 d23,d25,#25
+	vand q4,q12,q4
+	vadd.i64 d21,d23,d23
+	vshl.i64 d25,d23,#4
+	vadd.i64 d21,d21,d23
+	vadd.i64 d25,d25,d21
+	vadd.i64 d4,d4,d25
+	vzip.i32 q0,q8
+	vadd.i64 d12,d4,d14
+	add r6,r8,#8
+	vst1.8 d0,[r6,: 64]
+	vsub.i64 d19,d19,d9
+	add r6,r6,#16
+	vst1.8 d16,[r6,: 64]
+	vshr.s64 d22,d12,#26
+	vand q0,q6,q1
+	vadd.i64 d10,d10,d22
+	vzip.i32 q3,q9
+	vsub.i64 d4,d4,d0
+	sub r6,r6,#8
+	vst1.8 d6,[r6,: 64]
+	add r6,r6,#16
+	vst1.8 d18,[r6,: 64]
+	vzip.i32 q2,q5
+	sub r6,r6,#32
+	vst1.8 d4,[r6,: 64]
+	subs r5,r5,#1
+	bhi .Lsquaringloop
+	.Lskipsquaringloop:
+	mov r2,r2
+	add r5,r3,#288
+	add r6,r3,#144
+	vmov.i32 q0,#19
+	vmov.i32 q1,#0
+	vmov.i32 q2,#1
+	vzip.i32 q1,q2
+	vld1.8 {d4-d5},[r5,: 128]!
+	vld1.8 {d6-d7},[r5,: 128]!
+	vld1.8 {d9},[r5,: 64]
+	vld1.8 {d10-d11},[r2,: 128]!
+	add r5,sp,#416
+	vld1.8 {d12-d13},[r2,: 128]!
+	vmul.i32 q7,q2,q0
+	vld1.8 {d8},[r2,: 64]
+	vext.32 d17,d11,d10,#1
+	vmul.i32 q9,q3,q0
+	vext.32 d16,d10,d8,#1
+	vshl.u32 q10,q5,q1
+	vext.32 d22,d14,d4,#1
+	vext.32 d24,d18,d6,#1
+	vshl.u32 q13,q6,q1
+	vshl.u32 d28,d8,d2
+	vrev64.i32 d22,d22
+	vmul.i32 d1,d9,d1
+	vrev64.i32 d24,d24
+	vext.32 d29,d8,d13,#1
+	vext.32 d0,d1,d9,#1
+	vrev64.i32 d0,d0
+	vext.32 d2,d9,d1,#1
+	vext.32 d23,d15,d5,#1
+	vmull.s32 q4,d20,d4
+	vrev64.i32 d23,d23
+	vmlal.s32 q4,d21,d1
+	vrev64.i32 d2,d2
+	vmlal.s32 q4,d26,d19
+	vext.32 d3,d5,d15,#1
+	vmlal.s32 q4,d27,d18
+	vrev64.i32 d3,d3
+	vmlal.s32 q4,d28,d15
+	vext.32 d14,d12,d11,#1
+	vmull.s32 q5,d16,d23
+	vext.32 d15,d13,d12,#1
+	vmlal.s32 q5,d17,d4
+	vst1.8 d8,[r5,: 64]!
+	vmlal.s32 q5,d14,d1
+	vext.32 d12,d9,d8,#0
+	vmlal.s32 q5,d15,d19
+	vmov.i64 d13,#0
+	vmlal.s32 q5,d29,d18
+	vext.32 d25,d19,d7,#1
+	vmlal.s32 q6,d20,d5
+	vrev64.i32 d25,d25
+	vmlal.s32 q6,d21,d4
+	vst1.8 d11,[r5,: 64]!
+	vmlal.s32 q6,d26,d1
+	vext.32 d9,d10,d10,#0
+	vmlal.s32 q6,d27,d19
+	vmov.i64 d8,#0
+	vmlal.s32 q6,d28,d18
+	vmlal.s32 q4,d16,d24
+	vmlal.s32 q4,d17,d5
+	vmlal.s32 q4,d14,d4
+	vst1.8 d12,[r5,: 64]!
+	vmlal.s32 q4,d15,d1
+	vext.32 d10,d13,d12,#0
+	vmlal.s32 q4,d29,d19
+	vmov.i64 d11,#0
+	vmlal.s32 q5,d20,d6
+	vmlal.s32 q5,d21,d5
+	vmlal.s32 q5,d26,d4
+	vext.32 d13,d8,d8,#0
+	vmlal.s32 q5,d27,d1
+	vmov.i64 d12,#0
+	vmlal.s32 q5,d28,d19
+	vst1.8 d9,[r5,: 64]!
+	vmlal.s32 q6,d16,d25
+	vmlal.s32 q6,d17,d6
+	vst1.8 d10,[r5,: 64]
+	vmlal.s32 q6,d14,d5
+	vext.32 d8,d11,d10,#0
+	vmlal.s32 q6,d15,d4
+	vmov.i64 d9,#0
+	vmlal.s32 q6,d29,d1
+	vmlal.s32 q4,d20,d7
+	vmlal.s32 q4,d21,d6
+	vmlal.s32 q4,d26,d5
+	vext.32 d11,d12,d12,#0
+	vmlal.s32 q4,d27,d4
+	vmov.i64 d10,#0
+	vmlal.s32 q4,d28,d1
+	vmlal.s32 q5,d16,d0
+	sub r2,r5,#32
+	vmlal.s32 q5,d17,d7
+	vmlal.s32 q5,d14,d6
+	vext.32 d30,d9,d8,#0
+	vmlal.s32 q5,d15,d5
+	vld1.8 {d31},[r2,: 64]!
+	vmlal.s32 q5,d29,d4
+	vmlal.s32 q15,d20,d0
+	vext.32 d0,d6,d18,#1
+	vmlal.s32 q15,d21,d25
+	vrev64.i32 d0,d0
+	vmlal.s32 q15,d26,d24
+	vext.32 d1,d7,d19,#1
+	vext.32 d7,d10,d10,#0
+	vmlal.s32 q15,d27,d23
+	vrev64.i32 d1,d1
+	vld1.8 {d6},[r2,: 64]
+	vmlal.s32 q15,d28,d22
+	vmlal.s32 q3,d16,d4
+	add r2,r2,#24
+	vmlal.s32 q3,d17,d2
+	vext.32 d4,d31,d30,#0
+	vmov d17,d11
+	vmlal.s32 q3,d14,d1
+	vext.32 d11,d13,d13,#0
+	vext.32 d13,d30,d30,#0
+	vmlal.s32 q3,d15,d0
+	vext.32 d1,d8,d8,#0
+	vmlal.s32 q3,d29,d3
+	vld1.8 {d5},[r2,: 64]
+	sub r2,r2,#16
+	vext.32 d10,d6,d6,#0
+	vmov.i32 q1,#0xffffffff
+	vshl.i64 q4,q1,#25
+	add r5,sp,#512
+	vld1.8 {d14-d15},[r5,: 128]
+	vadd.i64 q9,q2,q7
+	vshl.i64 q1,q1,#26
+	vshr.s64 q10,q9,#26
+	vld1.8 {d0},[r2,: 64]!
+	vadd.i64 q5,q5,q10
+	vand q9,q9,q1
+	vld1.8 {d16},[r2,: 64]!
+	add r2,sp,#528
+	vld1.8 {d20-d21},[r2,: 128]
+	vadd.i64 q11,q5,q10
+	vsub.i64 q2,q2,q9
+	vshr.s64 q9,q11,#25
+	vext.32 d12,d5,d4,#0
+	vand q11,q11,q4
+	vadd.i64 q0,q0,q9
+	vmov d19,d7
+	vadd.i64 q3,q0,q7
+	vsub.i64 q5,q5,q11
+	vshr.s64 q11,q3,#26
+	vext.32 d18,d11,d10,#0
+	vand q3,q3,q1
+	vadd.i64 q8,q8,q11
+	vadd.i64 q11,q8,q10
+	vsub.i64 q0,q0,q3
+	vshr.s64 q3,q11,#25
+	vand q11,q11,q4
+	vadd.i64 q3,q6,q3
+	vadd.i64 q6,q3,q7
+	vsub.i64 q8,q8,q11
+	vshr.s64 q11,q6,#26
+	vand q6,q6,q1
+	vadd.i64 q9,q9,q11
+	vadd.i64 d25,d19,d21
+	vsub.i64 q3,q3,q6
+	vshr.s64 d23,d25,#25
+	vand q4,q12,q4
+	vadd.i64 d21,d23,d23
+	vshl.i64 d25,d23,#4
+	vadd.i64 d21,d21,d23
+	vadd.i64 d25,d25,d21
+	vadd.i64 d4,d4,d25
+	vzip.i32 q0,q8
+	vadd.i64 d12,d4,d14
+	add r2,r6,#8
+	vst1.8 d0,[r2,: 64]
+	vsub.i64 d19,d19,d9
+	add r2,r2,#16
+	vst1.8 d16,[r2,: 64]
+	vshr.s64 d22,d12,#26
+	vand q0,q6,q1
+	vadd.i64 d10,d10,d22
+	vzip.i32 q3,q9
+	vsub.i64 d4,d4,d0
+	sub r2,r2,#8
+	vst1.8 d6,[r2,: 64]
+	add r2,r2,#16
+	vst1.8 d18,[r2,: 64]
+	vzip.i32 q2,q5
+	sub r2,r2,#32
+	vst1.8 d4,[r2,: 64]
+	cmp r4,#0
+	beq .Lskippostcopy
+	add r2,r3,#144
+	mov r4,r4
+	vld1.8 {d0-d1},[r2,: 128]!
+	vld1.8 {d2-d3},[r2,: 128]!
+	vld1.8 {d4},[r2,: 64]
+	vst1.8 {d0-d1},[r4,: 128]!
+	vst1.8 {d2-d3},[r4,: 128]!
+	vst1.8 d4,[r4,: 64]
+	.Lskippostcopy:
+	cmp r1,#1
+	bne .Lskipfinalcopy
+	add r2,r3,#288
+	add r4,r3,#144
+	vld1.8 {d0-d1},[r2,: 128]!
+	vld1.8 {d2-d3},[r2,: 128]!
+	vld1.8 {d4},[r2,: 64]
+	vst1.8 {d0-d1},[r4,: 128]!
+	vst1.8 {d2-d3},[r4,: 128]!
+	vst1.8 d4,[r4,: 64]
+	.Lskipfinalcopy:
+	add r1,r1,#1
+	cmp r1,#12
+	blo .Linvertloop
+	add r1,r3,#144
+	ldr r2,[r1],#4
+	ldr r3,[r1],#4
+	ldr r4,[r1],#4
+	ldr r5,[r1],#4
+	ldr r6,[r1],#4
+	ldr r7,[r1],#4
+	ldr r8,[r1],#4
+	ldr r9,[r1],#4
+	ldr r10,[r1],#4
+	ldr r1,[r1]
+	add r11,r1,r1,LSL #4
+	add r11,r11,r1,LSL #1
+	add r11,r11,#16777216
+	mov r11,r11,ASR #25
+	add r11,r11,r2
+	mov r11,r11,ASR #26
+	add r11,r11,r3
+	mov r11,r11,ASR #25
+	add r11,r11,r4
+	mov r11,r11,ASR #26
+	add r11,r11,r5
+	mov r11,r11,ASR #25
+	add r11,r11,r6
+	mov r11,r11,ASR #26
+	add r11,r11,r7
+	mov r11,r11,ASR #25
+	add r11,r11,r8
+	mov r11,r11,ASR #26
+	add r11,r11,r9
+	mov r11,r11,ASR #25
+	add r11,r11,r10
+	mov r11,r11,ASR #26
+	add r11,r11,r1
+	mov r11,r11,ASR #25
+	add r2,r2,r11
+	add r2,r2,r11,LSL #1
+	add r2,r2,r11,LSL #4
+	mov r11,r2,ASR #26
+	add r3,r3,r11
+	sub r2,r2,r11,LSL #26
+	mov r11,r3,ASR #25
+	add r4,r4,r11
+	sub r3,r3,r11,LSL #25
+	mov r11,r4,ASR #26
+	add r5,r5,r11
+	sub r4,r4,r11,LSL #26
+	mov r11,r5,ASR #25
+	add r6,r6,r11
+	sub r5,r5,r11,LSL #25
+	mov r11,r6,ASR #26
+	add r7,r7,r11
+	sub r6,r6,r11,LSL #26
+	mov r11,r7,ASR #25
+	add r8,r8,r11
+	sub r7,r7,r11,LSL #25
+	mov r11,r8,ASR #26
+	add r9,r9,r11
+	sub r8,r8,r11,LSL #26
+	mov r11,r9,ASR #25
+	add r10,r10,r11
+	sub r9,r9,r11,LSL #25
+	mov r11,r10,ASR #26
+	add r1,r1,r11
+	sub r10,r10,r11,LSL #26
+	mov r11,r1,ASR #25
+	sub r1,r1,r11,LSL #25
+	add r2,r2,r3,LSL #26
+	mov r3,r3,LSR #6
+	add r3,r3,r4,LSL #19
+	mov r4,r4,LSR #13
+	add r4,r4,r5,LSL #13
+	mov r5,r5,LSR #19
+	add r5,r5,r6,LSL #6
+	add r6,r7,r8,LSL #25
+	mov r7,r8,LSR #7
+	add r7,r7,r9,LSL #19
+	mov r8,r9,LSR #13
+	add r8,r8,r10,LSL #12
+	mov r9,r10,LSR #20
+	add r1,r9,r1,LSL #6
+	str r2,[r0],#4
+	str r3,[r0],#4
+	str r4,[r0],#4
+	str r5,[r0],#4
+	str r6,[r0],#4
+	str r7,[r0],#4
+	str r8,[r0],#4
+	str r1,[r0]
+	ldrd r4,[sp,#0]
+	ldrd r6,[sp,#8]
+	ldrd r8,[sp,#16]
+	ldrd r10,[sp,#24]
+	ldr r12,[sp,#480]
+	ldr r14,[sp,#484]
+	ldr r0,=0
+	mov sp,r12
+	vpop {q4,q5,q6,q7}
+	bx lr
+ENDPROC(curve25519_neon)
+#endif
diff --git b/net/wireguard/crypto/curve25519-x86_64.S b/net/wireguard/crypto/curve25519-x86_64.S
new file mode 100644
index 0000000..4d26926
--- /dev/null
+++ b/net/wireguard/crypto/curve25519-x86_64.S
@@ -0,0 +1,3261 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on algorithms from Tung Chou <blueprint@crypto.tw>
+ */
+
+#include <linux/linkage.h>
+
+.data
+.align 16
+curve25519_sandy2x_v0_0: .quad 0, 0
+curve25519_sandy2x_v1_0: .quad 1, 0
+curve25519_sandy2x_v2_1: .quad 2, 1
+curve25519_sandy2x_v9_0: .quad 9, 0
+curve25519_sandy2x_v9_9: .quad 9, 9
+curve25519_sandy2x_v19_19: .quad 19, 19
+curve25519_sandy2x_v38_1: .quad 38, 1
+curve25519_sandy2x_v38_38: .quad 38, 38
+curve25519_sandy2x_v121666_121666: .quad 121666, 121666
+curve25519_sandy2x_m25: .quad 33554431, 33554431
+curve25519_sandy2x_m26: .quad 67108863, 67108863
+curve25519_sandy2x_subc0: .quad 0x07FFFFDA, 0x03FFFFFE
+curve25519_sandy2x_subc2: .quad 0x07FFFFFE, 0x03FFFFFE
+curve25519_sandy2x_REDMASK51: .quad 0x0007FFFFFFFFFFFF
+
+.text
+.align 32
+#ifdef CONFIG_AS_AVX
+ENTRY(curve25519_sandy2x_fe51_mul)
+	push %rbp
+	mov %rsp,%rbp
+	sub $96,%rsp
+	and $-32,%rsp
+	movq %r11,0(%rsp)
+	movq %r12,8(%rsp)
+	movq %r13,16(%rsp)
+	movq %r14,24(%rsp)
+	movq %r15,32(%rsp)
+	movq %rbx,40(%rsp)
+	movq %rbp,48(%rsp)
+	movq %rdi,56(%rsp)
+	mov %rdx,%rcx
+	movq 24(%rsi),%rdx
+	imulq $19,%rdx,%rax
+	movq %rax,64(%rsp)
+	mulq 16(%rcx)
+	mov %rax,%r8
+	mov %rdx,%r9
+	movq 32(%rsi),%rdx
+	imulq $19,%rdx,%rax
+	movq %rax,72(%rsp)
+	mulq 8(%rcx)
+	add %rax,%r8
+	adc %rdx,%r9
+	movq 0(%rsi),%rax
+	mulq 0(%rcx)
+	add %rax,%r8
+	adc %rdx,%r9
+	movq 0(%rsi),%rax
+	mulq 8(%rcx)
+	mov %rax,%r10
+	mov %rdx,%r11
+	movq 0(%rsi),%rax
+	mulq 16(%rcx)
+	mov %rax,%r12
+	mov %rdx,%r13
+	movq 0(%rsi),%rax
+	mulq 24(%rcx)
+	mov %rax,%r14
+	mov %rdx,%r15
+	movq 0(%rsi),%rax
+	mulq 32(%rcx)
+	mov %rax,%rbx
+	mov %rdx,%rbp
+	movq 8(%rsi),%rax
+	mulq 0(%rcx)
+	add %rax,%r10
+	adc %rdx,%r11
+	movq 8(%rsi),%rax
+	mulq 8(%rcx)
+	add %rax,%r12
+	adc %rdx,%r13
+	movq 8(%rsi),%rax
+	mulq 16(%rcx)
+	add %rax,%r14
+	adc %rdx,%r15
+	movq 8(%rsi),%rax
+	mulq 24(%rcx)
+	add %rax,%rbx
+	adc %rdx,%rbp
+	movq 8(%rsi),%rdx
+	imulq $19,%rdx,%rax
+	mulq 32(%rcx)
+	add %rax,%r8
+	adc %rdx,%r9
+	movq 16(%rsi),%rax
+	mulq 0(%rcx)
+	add %rax,%r12
+	adc %rdx,%r13
+	movq 16(%rsi),%rax
+	mulq 8(%rcx)
+	add %rax,%r14
+	adc %rdx,%r15
+	movq 16(%rsi),%rax
+	mulq 16(%rcx)
+	add %rax,%rbx
+	adc %rdx,%rbp
+	movq 16(%rsi),%rdx
+	imulq $19,%rdx,%rax
+	mulq 24(%rcx)
+	add %rax,%r8
+	adc %rdx,%r9
+	movq 16(%rsi),%rdx
+	imulq $19,%rdx,%rax
+	mulq 32(%rcx)
+	add %rax,%r10
+	adc %rdx,%r11
+	movq 24(%rsi),%rax
+	mulq 0(%rcx)
+	add %rax,%r14
+	adc %rdx,%r15
+	movq 24(%rsi),%rax
+	mulq 8(%rcx)
+	add %rax,%rbx
+	adc %rdx,%rbp
+	movq 64(%rsp),%rax
+	mulq 24(%rcx)
+	add %rax,%r10
+	adc %rdx,%r11
+	movq 64(%rsp),%rax
+	mulq 32(%rcx)
+	add %rax,%r12
+	adc %rdx,%r13
+	movq 32(%rsi),%rax
+	mulq 0(%rcx)
+	add %rax,%rbx
+	adc %rdx,%rbp
+	movq 72(%rsp),%rax
+	mulq 16(%rcx)
+	add %rax,%r10
+	adc %rdx,%r11
+	movq 72(%rsp),%rax
+	mulq 24(%rcx)
+	add %rax,%r12
+	adc %rdx,%r13
+	movq 72(%rsp),%rax
+	mulq 32(%rcx)
+	add %rax,%r14
+	adc %rdx,%r15
+	movq curve25519_sandy2x_REDMASK51(%rip),%rsi
+	shld $13,%r8,%r9
+	and %rsi,%r8
+	shld $13,%r10,%r11
+	and %rsi,%r10
+	add %r9,%r10
+	shld $13,%r12,%r13
+	and %rsi,%r12
+	add %r11,%r12
+	shld $13,%r14,%r15
+	and %rsi,%r14
+	add %r13,%r14
+	shld $13,%rbx,%rbp
+	and %rsi,%rbx
+	add %r15,%rbx
+	imulq $19,%rbp,%rdx
+	add %rdx,%r8
+	mov %r8,%rdx
+	shr $51,%rdx
+	add %r10,%rdx
+	mov %rdx,%rcx
+	shr $51,%rdx
+	and %rsi,%r8
+	add %r12,%rdx
+	mov %rdx,%r9
+	shr $51,%rdx
+	and %rsi,%rcx
+	add %r14,%rdx
+	mov %rdx,%rax
+	shr $51,%rdx
+	and %rsi,%r9
+	add %rbx,%rdx
+	mov %rdx,%r10
+	shr $51,%rdx
+	and %rsi,%rax
+	imulq $19,%rdx,%rdx
+	add %rdx,%r8
+	and %rsi,%r10
+	movq %r8,0(%rdi)
+	movq %rcx,8(%rdi)
+	movq %r9,16(%rdi)
+	movq %rax,24(%rdi)
+	movq %r10,32(%rdi)
+	movq 0(%rsp),%r11
+	movq 8(%rsp),%r12
+	movq 16(%rsp),%r13
+	movq 24(%rsp),%r14
+	movq 32(%rsp),%r15
+	movq 40(%rsp),%rbx
+	movq 48(%rsp),%rbp
+	leave
+	ret
+ENDPROC(curve25519_sandy2x_fe51_mul)
+
+.align 32
+ENTRY(curve25519_sandy2x_fe51_nsquare)
+	push %rbp
+	mov %rsp,%rbp
+	sub $64,%rsp
+	and $-32,%rsp
+	movq %r11,0(%rsp)
+	movq %r12,8(%rsp)
+	movq %r13,16(%rsp)
+	movq %r14,24(%rsp)
+	movq %r15,32(%rsp)
+	movq %rbx,40(%rsp)
+	movq %rbp,48(%rsp)
+	movq 0(%rsi),%rcx
+	movq 8(%rsi),%r8
+	movq 16(%rsi),%r9
+	movq 24(%rsi),%rax
+	movq 32(%rsi),%rsi
+	movq %r9,16(%rdi)
+	movq %rax,24(%rdi)
+	movq %rsi,32(%rdi)
+	mov %rdx,%rsi
+
+	.align 16
+	.Lloop:
+	sub $1,%rsi
+	mov %rcx,%rax
+	mul %rcx
+	add %rcx,%rcx
+	mov %rax,%r9
+	mov %rdx,%r10
+	mov %rcx,%rax
+	mul %r8
+	mov %rax,%r11
+	mov %rdx,%r12
+	mov %rcx,%rax
+	mulq 16(%rdi)
+	mov %rax,%r13
+	mov %rdx,%r14
+	mov %rcx,%rax
+	mulq 24(%rdi)
+	mov %rax,%r15
+	mov %rdx,%rbx
+	mov %rcx,%rax
+	mulq 32(%rdi)
+	mov %rax,%rcx
+	mov %rdx,%rbp
+	mov %r8,%rax
+	mul %r8
+	add %r8,%r8
+	add %rax,%r13
+	adc %rdx,%r14
+	mov %r8,%rax
+	mulq 16(%rdi)
+	add %rax,%r15
+	adc %rdx,%rbx
+	mov %r8,%rax
+	imulq $19, %r8,%r8
+	mulq 24(%rdi)
+	add %rax,%rcx
+	adc %rdx,%rbp
+	mov %r8,%rax
+	mulq 32(%rdi)
+	add %rax,%r9
+	adc %rdx,%r10
+	movq 16(%rdi),%rax
+	mulq 16(%rdi)
+	add %rax,%rcx
+	adc %rdx,%rbp
+	shld $13,%rcx,%rbp
+	movq 16(%rdi),%rax
+	imulq $38, %rax,%rax
+	mulq 24(%rdi)
+	add %rax,%r9
+	adc %rdx,%r10
+	shld $13,%r9,%r10
+	movq 16(%rdi),%rax
+	imulq $38, %rax,%rax
+	mulq 32(%rdi)
+	add %rax,%r11
+	adc %rdx,%r12
+	movq 24(%rdi),%rax
+	imulq $19, %rax,%rax
+	mulq 24(%rdi)
+	add %rax,%r11
+	adc %rdx,%r12
+	shld $13,%r11,%r12
+	movq 24(%rdi),%rax
+	imulq $38, %rax,%rax
+	mulq 32(%rdi)
+	add %rax,%r13
+	adc %rdx,%r14
+	shld $13,%r13,%r14
+	movq 32(%rdi),%rax
+	imulq $19, %rax,%rax
+	mulq 32(%rdi)
+	add %rax,%r15
+	adc %rdx,%rbx
+	shld $13,%r15,%rbx
+	movq curve25519_sandy2x_REDMASK51(%rip),%rdx
+	and %rdx,%rcx
+	add %rbx,%rcx
+	and %rdx,%r9
+	and %rdx,%r11
+	add %r10,%r11
+	and %rdx,%r13
+	add %r12,%r13
+	and %rdx,%r15
+	add %r14,%r15
+	imulq $19, %rbp,%rbp
+	lea (%r9,%rbp),%r9
+	mov %r9,%rax
+	shr $51,%r9
+	add %r11,%r9
+	and %rdx,%rax
+	mov %r9,%r8
+	shr $51,%r9
+	add %r13,%r9
+	and %rdx,%r8
+	mov %r9,%r10
+	shr $51,%r9
+	add %r15,%r9
+	and %rdx,%r10
+	movq %r10,16(%rdi)
+	mov %r9,%r10
+	shr $51,%r9
+	add %rcx,%r9
+	and %rdx,%r10
+	movq %r10,24(%rdi)
+	mov %r9,%r10
+	shr $51,%r9
+	imulq $19, %r9,%r9
+	lea (%rax,%r9),%rcx
+	and %rdx,%r10
+	movq %r10,32(%rdi)
+	cmp $0,%rsi
+	jne .Lloop
+
+	movq %rcx,0(%rdi)
+	movq %r8,8(%rdi)
+	movq 0(%rsp),%r11
+	movq 8(%rsp),%r12
+	movq 16(%rsp),%r13
+	movq 24(%rsp),%r14
+	movq 32(%rsp),%r15
+	movq 40(%rsp),%rbx
+	movq 48(%rsp),%rbp
+	leave
+	ret
+ENDPROC(curve25519_sandy2x_fe51_nsquare)
+
+.align 32
+ENTRY(curve25519_sandy2x_fe51_pack)
+	push %rbp
+	mov %rsp,%rbp
+	sub $32,%rsp
+	and $-32,%rsp
+	movq %r11,0(%rsp)
+	movq %r12,8(%rsp)
+	movq 0(%rsi),%rdx
+	movq 8(%rsi),%rcx
+	movq 16(%rsi),%r8
+	movq 24(%rsi),%r9
+	movq 32(%rsi),%rsi
+	movq curve25519_sandy2x_REDMASK51(%rip),%rax
+	lea -18(%rax),%r10
+	mov $3,%r11
+
+	.align 16
+	.Lreduceloop:
+	mov %rdx,%r12
+	shr $51,%r12
+	and %rax,%rdx
+	add %r12,%rcx
+	mov %rcx,%r12
+	shr $51,%r12
+	and %rax,%rcx
+	add %r12,%r8
+	mov %r8,%r12
+	shr $51,%r12
+	and %rax,%r8
+	add %r12,%r9
+	mov %r9,%r12
+	shr $51,%r12
+	and %rax,%r9
+	add %r12,%rsi
+	mov %rsi,%r12
+	shr $51,%r12
+	and %rax,%rsi
+	imulq $19, %r12,%r12
+	add %r12,%rdx
+	sub $1,%r11
+	ja .Lreduceloop
+
+	mov $1,%r12
+	cmp %r10,%rdx
+	cmovl %r11,%r12
+	cmp %rax,%rcx
+	cmovne %r11,%r12
+	cmp %rax,%r8
+	cmovne %r11,%r12
+	cmp %rax,%r9
+	cmovne %r11,%r12
+	cmp %rax,%rsi
+	cmovne %r11,%r12
+	neg %r12
+	and %r12,%rax
+	and %r12,%r10
+	sub %r10,%rdx
+	sub %rax,%rcx
+	sub %rax,%r8
+	sub %rax,%r9
+	sub %rax,%rsi
+	mov %rdx,%rax
+	and $0xFF,%eax
+	movb %al,0(%rdi)
+	mov %rdx,%rax
+	shr $8,%rax
+	and $0xFF,%eax
+	movb %al,1(%rdi)
+	mov %rdx,%rax
+	shr $16,%rax
+	and $0xFF,%eax
+	movb %al,2(%rdi)
+	mov %rdx,%rax
+	shr $24,%rax
+	and $0xFF,%eax
+	movb %al,3(%rdi)
+	mov %rdx,%rax
+	shr $32,%rax
+	and $0xFF,%eax
+	movb %al,4(%rdi)
+	mov %rdx,%rax
+	shr $40,%rax
+	and $0xFF,%eax
+	movb %al,5(%rdi)
+	mov %rdx,%rdx
+	shr $48,%rdx
+	mov %rcx,%rax
+	shl $3,%rax
+	and $0xF8,%eax
+	xor %rdx,%rax
+	movb %al,6(%rdi)
+	mov %rcx,%rdx
+	shr $5,%rdx
+	and $0xFF,%edx
+	movb %dl,7(%rdi)
+	mov %rcx,%rdx
+	shr $13,%rdx
+	and $0xFF,%edx
+	movb %dl,8(%rdi)
+	mov %rcx,%rdx
+	shr $21,%rdx
+	and $0xFF,%edx
+	movb %dl,9(%rdi)
+	mov %rcx,%rdx
+	shr $29,%rdx
+	and $0xFF,%edx
+	movb %dl,10(%rdi)
+	mov %rcx,%rdx
+	shr $37,%rdx
+	and $0xFF,%edx
+	movb %dl,11(%rdi)
+	mov %rcx,%rdx
+	shr $45,%rdx
+	mov %r8,%rcx
+	shl $6,%rcx
+	and $0xC0,%ecx
+	xor %rdx,%rcx
+	movb %cl,12(%rdi)
+	mov %r8,%rdx
+	shr $2,%rdx
+	and $0xFF,%edx
+	movb %dl,13(%rdi)
+	mov %r8,%rdx
+	shr $10,%rdx
+	and $0xFF,%edx
+	movb %dl,14(%rdi)
+	mov %r8,%rdx
+	shr $18,%rdx
+	and $0xFF,%edx
+	movb %dl,15(%rdi)
+	mov %r8,%rdx
+	shr $26,%rdx
+	and $0xFF,%edx
+	movb %dl,16(%rdi)
+	mov %r8,%rdx
+	shr $34,%rdx
+	and $0xFF,%edx
+	movb %dl,17(%rdi)
+	mov %r8,%rdx
+	shr $42,%rdx
+	movb %dl,18(%rdi)
+	mov %r8,%rdx
+	shr $50,%rdx
+	mov %r9,%rcx
+	shl $1,%rcx
+	and $0xFE,%ecx
+	xor %rdx,%rcx
+	movb %cl,19(%rdi)
+	mov %r9,%rdx
+	shr $7,%rdx
+	and $0xFF,%edx
+	movb %dl,20(%rdi)
+	mov %r9,%rdx
+	shr $15,%rdx
+	and $0xFF,%edx
+	movb %dl,21(%rdi)
+	mov %r9,%rdx
+	shr $23,%rdx
+	and $0xFF,%edx
+	movb %dl,22(%rdi)
+	mov %r9,%rdx
+	shr $31,%rdx
+	and $0xFF,%edx
+	movb %dl,23(%rdi)
+	mov %r9,%rdx
+	shr $39,%rdx
+	and $0xFF,%edx
+	movb %dl,24(%rdi)
+	mov %r9,%rdx
+	shr $47,%rdx
+	mov %rsi,%rcx
+	shl $4,%rcx
+	and $0xF0,%ecx
+	xor %rdx,%rcx
+	movb %cl,25(%rdi)
+	mov %rsi,%rdx
+	shr $4,%rdx
+	and $0xFF,%edx
+	movb %dl,26(%rdi)
+	mov %rsi,%rdx
+	shr $12,%rdx
+	and $0xFF,%edx
+	movb %dl,27(%rdi)
+	mov %rsi,%rdx
+	shr $20,%rdx
+	and $0xFF,%edx
+	movb %dl,28(%rdi)
+	mov %rsi,%rdx
+	shr $28,%rdx
+	and $0xFF,%edx
+	movb %dl,29(%rdi)
+	mov %rsi,%rdx
+	shr $36,%rdx
+	and $0xFF,%edx
+	movb %dl,30(%rdi)
+	mov %rsi,%rsi
+	shr $44,%rsi
+	movb %sil,31(%rdi)
+	movq 0(%rsp),%r11
+	movq 8(%rsp),%r12
+	leave
+	ret
+ENDPROC(curve25519_sandy2x_fe51_pack)
+
+.align 32
+ENTRY(curve25519_sandy2x_ladder)
+	push %rbp
+	mov %rsp,%rbp
+	sub $1856,%rsp
+	and $-32,%rsp
+	movq %r11,1824(%rsp)
+	movq %r12,1832(%rsp)
+	movq %r13,1840(%rsp)
+	movq %r14,1848(%rsp)
+	vmovdqa curve25519_sandy2x_v0_0(%rip),%xmm0
+	vmovdqa curve25519_sandy2x_v1_0(%rip),%xmm1
+	vmovdqu 0(%rdi),%xmm2
+	vmovdqa %xmm2,0(%rsp)
+	vmovdqu 16(%rdi),%xmm2
+	vmovdqa %xmm2,16(%rsp)
+	vmovdqu 32(%rdi),%xmm2
+	vmovdqa %xmm2,32(%rsp)
+	vmovdqu 48(%rdi),%xmm2
+	vmovdqa %xmm2,48(%rsp)
+	vmovdqu 64(%rdi),%xmm2
+	vmovdqa %xmm2,64(%rsp)
+	vmovdqa %xmm1,80(%rsp)
+	vmovdqa %xmm0,96(%rsp)
+	vmovdqa %xmm0,112(%rsp)
+	vmovdqa %xmm0,128(%rsp)
+	vmovdqa %xmm0,144(%rsp)
+	vmovdqa %xmm1,%xmm0
+	vpxor %xmm1,%xmm1,%xmm1
+	vpxor %xmm2,%xmm2,%xmm2
+	vpxor %xmm3,%xmm3,%xmm3
+	vpxor %xmm4,%xmm4,%xmm4
+	vpxor %xmm5,%xmm5,%xmm5
+	vpxor %xmm6,%xmm6,%xmm6
+	vpxor %xmm7,%xmm7,%xmm7
+	vpxor %xmm8,%xmm8,%xmm8
+	vpxor %xmm9,%xmm9,%xmm9
+	vmovdqu 0(%rdi),%xmm10
+	vmovdqa %xmm10,160(%rsp)
+	vmovdqu 16(%rdi),%xmm10
+	vmovdqa %xmm10,176(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,192(%rsp)
+	vmovdqu 32(%rdi),%xmm10
+	vmovdqa %xmm10,208(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,224(%rsp)
+	vmovdqu 48(%rdi),%xmm10
+	vmovdqa %xmm10,240(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,256(%rsp)
+	vmovdqu 64(%rdi),%xmm10
+	vmovdqa %xmm10,272(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,288(%rsp)
+	vmovdqu 8(%rdi),%xmm10
+	vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,304(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,320(%rsp)
+	vmovdqu 24(%rdi),%xmm10
+	vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,336(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,352(%rsp)
+	vmovdqu 40(%rdi),%xmm10
+	vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,368(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,384(%rsp)
+	vmovdqu 56(%rdi),%xmm10
+	vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,400(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,416(%rsp)
+	vmovdqu 0(%rdi),%xmm10
+	vmovdqu 64(%rdi),%xmm11
+	vblendps $12, %xmm11, %xmm10, %xmm10
+	vpshufd $2,%xmm10,%xmm10
+	vpmuludq curve25519_sandy2x_v38_1(%rip),%xmm10,%xmm10
+	vmovdqa %xmm10,432(%rsp)
+	movq 0(%rsi),%rdx
+	movq 8(%rsi),%rcx
+	movq 16(%rsi),%r8
+	movq 24(%rsi),%r9
+	shrd $1,%rcx,%rdx
+	shrd $1,%r8,%rcx
+	shrd $1,%r9,%r8
+	shr $1,%r9
+	xorq 0(%rsi),%rdx
+	xorq 8(%rsi),%rcx
+	xorq 16(%rsi),%r8
+	xorq 24(%rsi),%r9
+	leaq 800(%rsp),%rsi
+	mov $64,%rax
+
+	.align 16
+	.Lladder_small_loop:
+	mov %rdx,%r10
+	mov %rcx,%r11
+	mov %r8,%r12
+	mov %r9,%r13
+	shr $1,%rdx
+	shr $1,%rcx
+	shr $1,%r8
+	shr $1,%r9
+	and $1,%r10d
+	and $1,%r11d
+	and $1,%r12d
+	and $1,%r13d
+	neg %r10
+	neg %r11
+	neg %r12
+	neg %r13
+	movl %r10d,0(%rsi)
+	movl %r11d,256(%rsi)
+	movl %r12d,512(%rsi)
+	movl %r13d,768(%rsi)
+	add $4,%rsi
+	sub $1,%rax
+	jne .Lladder_small_loop
+	mov $255,%rdx
+	add $760,%rsi
+
+	.align 16
+	.Lladder_loop:
+	sub $1,%rdx
+	vbroadcastss 0(%rsi),%xmm10
+	sub $4,%rsi
+	vmovdqa 0(%rsp),%xmm11
+	vmovdqa 80(%rsp),%xmm12
+	vpxor %xmm11,%xmm0,%xmm13
+	vpand %xmm10,%xmm13,%xmm13
+	vpxor %xmm13,%xmm0,%xmm0
+	vpxor %xmm13,%xmm11,%xmm11
+	vpxor %xmm12,%xmm1,%xmm13
+	vpand %xmm10,%xmm13,%xmm13
+	vpxor %xmm13,%xmm1,%xmm1
+	vpxor %xmm13,%xmm12,%xmm12
+	vmovdqa 16(%rsp),%xmm13
+	vmovdqa 96(%rsp),%xmm14
+	vpxor %xmm13,%xmm2,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm2,%xmm2
+	vpxor %xmm15,%xmm13,%xmm13
+	vpxor %xmm14,%xmm3,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm3,%xmm3
+	vpxor %xmm15,%xmm14,%xmm14
+	vmovdqa %xmm13,0(%rsp)
+	vmovdqa %xmm14,16(%rsp)
+	vmovdqa 32(%rsp),%xmm13
+	vmovdqa 112(%rsp),%xmm14
+	vpxor %xmm13,%xmm4,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm4,%xmm4
+	vpxor %xmm15,%xmm13,%xmm13
+	vpxor %xmm14,%xmm5,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm5,%xmm5
+	vpxor %xmm15,%xmm14,%xmm14
+	vmovdqa %xmm13,32(%rsp)
+	vmovdqa %xmm14,80(%rsp)
+	vmovdqa 48(%rsp),%xmm13
+	vmovdqa 128(%rsp),%xmm14
+	vpxor %xmm13,%xmm6,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm6,%xmm6
+	vpxor %xmm15,%xmm13,%xmm13
+	vpxor %xmm14,%xmm7,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm7,%xmm7
+	vpxor %xmm15,%xmm14,%xmm14
+	vmovdqa %xmm13,48(%rsp)
+	vmovdqa %xmm14,96(%rsp)
+	vmovdqa 64(%rsp),%xmm13
+	vmovdqa 144(%rsp),%xmm14
+	vpxor %xmm13,%xmm8,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm8,%xmm8
+	vpxor %xmm15,%xmm13,%xmm13
+	vpxor %xmm14,%xmm9,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm9,%xmm9
+	vpxor %xmm15,%xmm14,%xmm14
+	vmovdqa %xmm13,64(%rsp)
+	vmovdqa %xmm14,112(%rsp)
+	vpaddq curve25519_sandy2x_subc0(%rip),%xmm11,%xmm10
+	vpsubq %xmm12,%xmm10,%xmm10
+	vpaddq %xmm12,%xmm11,%xmm11
+	vpunpckhqdq %xmm10,%xmm11,%xmm12
+	vpunpcklqdq %xmm10,%xmm11,%xmm10
+	vpaddq %xmm1,%xmm0,%xmm11
+	vpaddq curve25519_sandy2x_subc0(%rip),%xmm0,%xmm0
+	vpsubq %xmm1,%xmm0,%xmm0
+	vpunpckhqdq %xmm11,%xmm0,%xmm1
+	vpunpcklqdq %xmm11,%xmm0,%xmm0
+	vpmuludq %xmm0,%xmm10,%xmm11
+	vpmuludq %xmm1,%xmm10,%xmm13
+	vmovdqa %xmm1,128(%rsp)
+	vpaddq %xmm1,%xmm1,%xmm1
+	vpmuludq %xmm0,%xmm12,%xmm14
+	vmovdqa %xmm0,144(%rsp)
+	vpaddq %xmm14,%xmm13,%xmm13
+	vpmuludq %xmm1,%xmm12,%xmm0
+	vmovdqa %xmm1,448(%rsp)
+	vpaddq %xmm3,%xmm2,%xmm1
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm2,%xmm2
+	vpsubq %xmm3,%xmm2,%xmm2
+	vpunpckhqdq %xmm1,%xmm2,%xmm3
+	vpunpcklqdq %xmm1,%xmm2,%xmm1
+	vpmuludq %xmm1,%xmm10,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vpmuludq %xmm3,%xmm10,%xmm2
+	vmovdqa %xmm3,464(%rsp)
+	vpaddq %xmm3,%xmm3,%xmm3
+	vpmuludq %xmm1,%xmm12,%xmm14
+	vmovdqa %xmm1,480(%rsp)
+	vpaddq %xmm14,%xmm2,%xmm2
+	vpmuludq %xmm3,%xmm12,%xmm1
+	vmovdqa %xmm3,496(%rsp)
+	vpaddq %xmm5,%xmm4,%xmm3
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm4,%xmm4
+	vpsubq %xmm5,%xmm4,%xmm4
+	vpunpckhqdq %xmm3,%xmm4,%xmm5
+	vpunpcklqdq %xmm3,%xmm4,%xmm3
+	vpmuludq %xmm3,%xmm10,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vpmuludq %xmm5,%xmm10,%xmm4
+	vmovdqa %xmm5,512(%rsp)
+	vpaddq %xmm5,%xmm5,%xmm5
+	vpmuludq %xmm3,%xmm12,%xmm14
+	vmovdqa %xmm3,528(%rsp)
+	vpaddq %xmm14,%xmm4,%xmm4
+	vpaddq %xmm7,%xmm6,%xmm3
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm6,%xmm6
+	vpsubq %xmm7,%xmm6,%xmm6
+	vpunpckhqdq %xmm3,%xmm6,%xmm7
+	vpunpcklqdq %xmm3,%xmm6,%xmm3
+	vpmuludq %xmm3,%xmm10,%xmm6
+	vpmuludq %xmm5,%xmm12,%xmm14
+	vmovdqa %xmm5,544(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm5,%xmm5
+	vmovdqa %xmm5,560(%rsp)
+	vpaddq %xmm14,%xmm6,%xmm6
+	vpmuludq %xmm7,%xmm10,%xmm5
+	vmovdqa %xmm7,576(%rsp)
+	vpaddq %xmm7,%xmm7,%xmm7
+	vpmuludq %xmm3,%xmm12,%xmm14
+	vmovdqa %xmm3,592(%rsp)
+	vpaddq %xmm14,%xmm5,%xmm5
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vmovdqa %xmm3,608(%rsp)
+	vpaddq %xmm9,%xmm8,%xmm3
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm8,%xmm8
+	vpsubq %xmm9,%xmm8,%xmm8
+	vpunpckhqdq %xmm3,%xmm8,%xmm9
+	vpunpcklqdq %xmm3,%xmm8,%xmm3
+	vmovdqa %xmm3,624(%rsp)
+	vpmuludq %xmm7,%xmm12,%xmm8
+	vmovdqa %xmm7,640(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm7,%xmm7
+	vmovdqa %xmm7,656(%rsp)
+	vpmuludq %xmm3,%xmm10,%xmm7
+	vpaddq %xmm7,%xmm8,%xmm8
+	vpmuludq %xmm9,%xmm10,%xmm7
+	vmovdqa %xmm9,672(%rsp)
+	vpaddq %xmm9,%xmm9,%xmm9
+	vpmuludq %xmm3,%xmm12,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vmovdqa %xmm3,688(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm12,%xmm12
+	vpmuludq %xmm9,%xmm12,%xmm3
+	vmovdqa %xmm9,704(%rsp)
+	vpaddq %xmm3,%xmm11,%xmm11
+	vmovdqa 0(%rsp),%xmm3
+	vmovdqa 16(%rsp),%xmm9
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
+	vpsubq %xmm9,%xmm10,%xmm10
+	vpaddq %xmm9,%xmm3,%xmm3
+	vpunpckhqdq %xmm10,%xmm3,%xmm9
+	vpunpcklqdq %xmm10,%xmm3,%xmm3
+	vpmuludq 144(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm0,%xmm0
+	vpmuludq 128(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm2,%xmm2
+	vpmuludq 480(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpmuludq 464(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm4,%xmm4
+	vpmuludq 528(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm6,%xmm6
+	vpmuludq 512(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm5,%xmm5
+	vpmuludq 592(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpmuludq 576(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vpmuludq 624(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm11,%xmm11
+	vpmuludq 672(%rsp),%xmm3,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 144(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpmuludq 448(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm1,%xmm1
+	vpmuludq 480(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 496(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 528(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 544(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm8,%xmm8
+	vpmuludq 592(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
+	vpmuludq 640(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 624(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 704(%rsp),%xmm9,%xmm9
+	vpaddq %xmm9,%xmm0,%xmm0
+	vmovdqa 32(%rsp),%xmm3
+	vmovdqa 80(%rsp),%xmm9
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
+	vpsubq %xmm9,%xmm10,%xmm10
+	vpaddq %xmm9,%xmm3,%xmm3
+	vpunpckhqdq %xmm10,%xmm3,%xmm9
+	vpunpcklqdq %xmm10,%xmm3,%xmm3
+	vpmuludq 144(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpmuludq 128(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm4,%xmm4
+	vpmuludq 480(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm6,%xmm6
+	vpmuludq 464(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm5,%xmm5
+	vpmuludq 528(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpmuludq 512(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vpmuludq 592(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm11,%xmm11
+	vpmuludq 576(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm13,%xmm13
+	vpmuludq 624(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm0,%xmm0
+	vpmuludq 672(%rsp),%xmm3,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpmuludq 144(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 448(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 480(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 496(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm8,%xmm8
+	vpmuludq 528(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
+	vpmuludq 544(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 592(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 640(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm0,%xmm0
+	vpmuludq 624(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpmuludq 704(%rsp),%xmm9,%xmm9
+	vpaddq %xmm9,%xmm1,%xmm1
+	vmovdqa 48(%rsp),%xmm3
+	vmovdqa 96(%rsp),%xmm9
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
+	vpsubq %xmm9,%xmm10,%xmm10
+	vpaddq %xmm9,%xmm3,%xmm3
+	vpunpckhqdq %xmm10,%xmm3,%xmm9
+	vpunpcklqdq %xmm10,%xmm3,%xmm3
+	vpmuludq 144(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm6,%xmm6
+	vpmuludq 128(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm5,%xmm5
+	vpmuludq 480(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpmuludq 464(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vpmuludq 528(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm11,%xmm11
+	vpmuludq 512(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm13,%xmm13
+	vpmuludq 592(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm0,%xmm0
+	vpmuludq 576(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm2,%xmm2
+	vpmuludq 624(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpmuludq 672(%rsp),%xmm3,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 144(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 448(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm8,%xmm8
+	vpmuludq 480(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
+	vpmuludq 496(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 528(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 544(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm0,%xmm0
+	vpmuludq 592(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpmuludq 640(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm1,%xmm1
+	vpmuludq 624(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 704(%rsp),%xmm9,%xmm9
+	vpaddq %xmm9,%xmm6,%xmm6
+	vmovdqa 64(%rsp),%xmm3
+	vmovdqa 112(%rsp),%xmm9
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
+	vpsubq %xmm9,%xmm10,%xmm10
+	vpaddq %xmm9,%xmm3,%xmm3
+	vpunpckhqdq %xmm10,%xmm3,%xmm9
+	vpunpcklqdq %xmm10,%xmm3,%xmm3
+	vpmuludq 144(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpmuludq 128(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vpmuludq 480(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm11,%xmm11
+	vpmuludq 464(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm13,%xmm13
+	vpmuludq 528(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm0,%xmm0
+	vpmuludq 512(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm2,%xmm2
+	vpmuludq 592(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpmuludq 576(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm4,%xmm4
+	vpmuludq 624(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm6,%xmm6
+	vpmuludq 672(%rsp),%xmm3,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 144(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
+	vpmuludq 448(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 480(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 496(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm0,%xmm0
+	vpmuludq 528(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpmuludq 544(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm1,%xmm1
+	vpmuludq 592(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 640(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 624(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 704(%rsp),%xmm9,%xmm9
+	vpaddq %xmm9,%xmm8,%xmm8
+	vpsrlq $25,%xmm4,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4
+	vpsrlq $26,%xmm11,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
+	vpsrlq $26,%xmm6,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
+	vpsrlq $25,%xmm13,%xmm3
+	vpaddq %xmm3,%xmm0,%xmm0
+	vpand curve25519_sandy2x_m25(%rip),%xmm13,%xmm13
+	vpsrlq $25,%xmm5,%xmm3
+	vpaddq %xmm3,%xmm8,%xmm8
+	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
+	vpsrlq $26,%xmm0,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpand curve25519_sandy2x_m26(%rip),%xmm0,%xmm0
+	vpsrlq $26,%xmm8,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
+	vpsrlq $25,%xmm2,%xmm3
+	vpaddq %xmm3,%xmm1,%xmm1
+	vpand curve25519_sandy2x_m25(%rip),%xmm2,%xmm2
+	vpsrlq $25,%xmm7,%xmm3
+	vpsllq $4,%xmm3,%xmm9
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpsllq $1,%xmm3,%xmm3
+	vpaddq %xmm3,%xmm9,%xmm9
+	vpaddq %xmm9,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
+	vpsrlq $26,%xmm1,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
+	vpsrlq $26,%xmm11,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
+	vpsrlq $25,%xmm4,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4
+	vpunpcklqdq %xmm13,%xmm11,%xmm3
+	vpunpckhqdq %xmm13,%xmm11,%xmm9
+	vpaddq curve25519_sandy2x_subc0(%rip),%xmm9,%xmm10
+	vpsubq %xmm3,%xmm10,%xmm10
+	vpaddq %xmm9,%xmm3,%xmm3
+	vpunpckhqdq %xmm3,%xmm10,%xmm9
+	vpunpcklqdq %xmm3,%xmm10,%xmm10
+	vpmuludq %xmm10,%xmm10,%xmm3
+	vpaddq %xmm10,%xmm10,%xmm10
+	vpmuludq %xmm9,%xmm10,%xmm11
+	vpunpcklqdq %xmm2,%xmm0,%xmm12
+	vpunpckhqdq %xmm2,%xmm0,%xmm0
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm0,%xmm2
+	vpsubq %xmm12,%xmm2,%xmm2
+	vpaddq %xmm0,%xmm12,%xmm12
+	vpunpckhqdq %xmm12,%xmm2,%xmm0
+	vpunpcklqdq %xmm12,%xmm2,%xmm2
+	vpmuludq %xmm2,%xmm10,%xmm12
+	vpaddq %xmm9,%xmm9,%xmm13
+	vpmuludq %xmm13,%xmm9,%xmm9
+	vpaddq %xmm9,%xmm12,%xmm12
+	vpmuludq %xmm0,%xmm10,%xmm9
+	vpmuludq %xmm2,%xmm13,%xmm14
+	vpaddq %xmm14,%xmm9,%xmm9
+	vpunpcklqdq %xmm4,%xmm1,%xmm14
+	vpunpckhqdq %xmm4,%xmm1,%xmm1
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm1,%xmm4
+	vpsubq %xmm14,%xmm4,%xmm4
+	vpaddq %xmm1,%xmm14,%xmm14
+	vpunpckhqdq %xmm14,%xmm4,%xmm1
+	vpunpcklqdq %xmm14,%xmm4,%xmm4
+	vmovdqa %xmm1,0(%rsp)
+	vpaddq %xmm1,%xmm1,%xmm1
+	vmovdqa %xmm1,16(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
+	vmovdqa %xmm1,32(%rsp)
+	vpmuludq %xmm4,%xmm10,%xmm1
+	vpmuludq %xmm2,%xmm2,%xmm14
+	vpaddq %xmm14,%xmm1,%xmm1
+	vpmuludq 0(%rsp),%xmm10,%xmm14
+	vpmuludq %xmm4,%xmm13,%xmm15
+	vpaddq %xmm15,%xmm14,%xmm14
+	vpunpcklqdq %xmm5,%xmm6,%xmm15
+	vpunpckhqdq %xmm5,%xmm6,%xmm5
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm6
+	vpsubq %xmm15,%xmm6,%xmm6
+	vpaddq %xmm5,%xmm15,%xmm15
+	vpunpckhqdq %xmm15,%xmm6,%xmm5
+	vpunpcklqdq %xmm15,%xmm6,%xmm6
+	vmovdqa %xmm6,48(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm6,%xmm6
+	vmovdqa %xmm6,64(%rsp)
+	vmovdqa %xmm5,80(%rsp)
+	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm5,%xmm5
+	vmovdqa %xmm5,96(%rsp)
+	vpmuludq 48(%rsp),%xmm10,%xmm5
+	vpaddq %xmm0,%xmm0,%xmm6
+	vpmuludq %xmm6,%xmm0,%xmm0
+	vpaddq %xmm0,%xmm5,%xmm5
+	vpmuludq 80(%rsp),%xmm10,%xmm0
+	vpmuludq %xmm4,%xmm6,%xmm15
+	vpaddq %xmm15,%xmm0,%xmm0
+	vpmuludq %xmm6,%xmm13,%xmm15
+	vpaddq %xmm15,%xmm1,%xmm1
+	vpmuludq %xmm6,%xmm2,%xmm15
+	vpaddq %xmm15,%xmm14,%xmm14
+	vpunpcklqdq %xmm7,%xmm8,%xmm15
+	vpunpckhqdq %xmm7,%xmm8,%xmm7
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm8
+	vpsubq %xmm15,%xmm8,%xmm8
+	vpaddq %xmm7,%xmm15,%xmm15
+	vpunpckhqdq %xmm15,%xmm8,%xmm7
+	vpunpcklqdq %xmm15,%xmm8,%xmm8
+	vmovdqa %xmm8,112(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm8,%xmm8
+	vmovdqa %xmm8,448(%rsp)
+	vpmuludq 112(%rsp),%xmm10,%xmm8
+	vpmuludq %xmm7,%xmm10,%xmm10
+	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm7,%xmm15
+	vpmuludq %xmm15,%xmm7,%xmm7
+	vpaddq %xmm7,%xmm8,%xmm8
+	vpmuludq %xmm15,%xmm13,%xmm7
+	vpaddq %xmm7,%xmm3,%xmm3
+	vpmuludq %xmm15,%xmm2,%xmm7
+	vpaddq %xmm7,%xmm11,%xmm11
+	vpmuludq 80(%rsp),%xmm13,%xmm7
+	vpaddq %xmm7,%xmm7,%xmm7
+	vpaddq %xmm7,%xmm8,%xmm8
+	vpmuludq 16(%rsp),%xmm13,%xmm7
+	vpaddq %xmm7,%xmm5,%xmm5
+	vpmuludq 48(%rsp),%xmm13,%xmm7
+	vpaddq %xmm7,%xmm0,%xmm0
+	vpmuludq 112(%rsp),%xmm13,%xmm7
+	vpaddq %xmm7,%xmm10,%xmm10
+	vpmuludq %xmm15,%xmm6,%xmm7
+	vpaddq %xmm7,%xmm12,%xmm12
+	vpmuludq %xmm15,%xmm4,%xmm7
+	vpaddq %xmm7,%xmm9,%xmm9
+	vpaddq %xmm2,%xmm2,%xmm2
+	vpmuludq %xmm4,%xmm2,%xmm7
+	vpaddq %xmm7,%xmm5,%xmm5
+	vpmuludq 448(%rsp),%xmm2,%xmm7
+	vpaddq %xmm7,%xmm3,%xmm3
+	vpmuludq 448(%rsp),%xmm6,%xmm7
+	vpaddq %xmm7,%xmm11,%xmm11
+	vpmuludq 0(%rsp),%xmm2,%xmm7
+	vpaddq %xmm7,%xmm0,%xmm0
+	vpmuludq 48(%rsp),%xmm2,%xmm7
+	vpaddq %xmm7,%xmm8,%xmm8
+	vpmuludq 80(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 96(%rsp),%xmm4,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpmuludq %xmm4,%xmm4,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpaddq %xmm4,%xmm4,%xmm2
+	vpmuludq 448(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm12,%xmm12
+	vpmuludq 16(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vpmuludq 48(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm14,%xmm14
+	vpmuludq 96(%rsp),%xmm6,%xmm4
+	vpaddq %xmm4,%xmm3,%xmm3
+	vmovdqa 16(%rsp),%xmm4
+	vpmuludq 448(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm9,%xmm9
+	vpmuludq 16(%rsp),%xmm6,%xmm4
+	vpaddq %xmm4,%xmm8,%xmm8
+	vpmuludq 48(%rsp),%xmm6,%xmm4
+	vpaddq %xmm4,%xmm10,%xmm10
+	vpmuludq 80(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm4,%xmm4
+	vpaddq %xmm4,%xmm5,%xmm5
+	vpmuludq 112(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm0,%xmm0
+	vmovdqa 48(%rsp),%xmm4
+	vpaddq %xmm4,%xmm4,%xmm4
+	vpmuludq 448(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vmovdqa 80(%rsp),%xmm4
+	vpaddq %xmm4,%xmm4,%xmm4
+	vpmuludq 448(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm14,%xmm14
+	vpmuludq 64(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm3,%xmm3
+	vmovdqa 16(%rsp),%xmm4
+	vpmuludq 64(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm11,%xmm11
+	vmovdqa 16(%rsp),%xmm4
+	vpmuludq 96(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm12,%xmm12
+	vmovdqa 48(%rsp),%xmm4
+	vpmuludq 96(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm9,%xmm9
+	vpmuludq 0(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vmovdqa 32(%rsp),%xmm2
+	vpmuludq 0(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm3,%xmm3
+	vmovdqa 64(%rsp),%xmm2
+	vpmuludq 48(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vmovdqa 96(%rsp),%xmm2
+	vpmuludq 80(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm1,%xmm1
+	vmovdqa 448(%rsp),%xmm2
+	vpmuludq 112(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpsrlq $26,%xmm3,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3
+	vpsrlq $25,%xmm14,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
+	vpsrlq $25,%xmm11,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpand curve25519_sandy2x_m25(%rip),%xmm11,%xmm11
+	vpsrlq $26,%xmm5,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5
+	vpsrlq $26,%xmm12,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpand curve25519_sandy2x_m26(%rip),%xmm12,%xmm12
+	vpsrlq $25,%xmm0,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
+	vpsrlq $25,%xmm9,%xmm2
+	vpaddq %xmm2,%xmm1,%xmm1
+	vpand curve25519_sandy2x_m25(%rip),%xmm9,%xmm9
+	vpsrlq $26,%xmm8,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
+	vpsrlq $26,%xmm1,%xmm2
+	vpaddq %xmm2,%xmm14,%xmm14
+	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
+	vpsrlq $25,%xmm10,%xmm2
+	vpsllq $4,%xmm2,%xmm4
+	vpaddq %xmm2,%xmm3,%xmm3
+	vpsllq $1,%xmm2,%xmm2
+	vpaddq %xmm2,%xmm4,%xmm4
+	vpaddq %xmm4,%xmm3,%xmm3
+	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
+	vpsrlq $25,%xmm14,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
+	vpsrlq $26,%xmm3,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3
+	vpunpckhqdq %xmm11,%xmm3,%xmm2
+	vmovdqa %xmm2,0(%rsp)
+	vpshufd $0,%xmm3,%xmm2
+	vpshufd $0,%xmm11,%xmm3
+	vpmuludq 160(%rsp),%xmm2,%xmm4
+	vpmuludq 432(%rsp),%xmm3,%xmm6
+	vpaddq %xmm6,%xmm4,%xmm4
+	vpmuludq 176(%rsp),%xmm2,%xmm6
+	vpmuludq 304(%rsp),%xmm3,%xmm7
+	vpaddq %xmm7,%xmm6,%xmm6
+	vpmuludq 208(%rsp),%xmm2,%xmm7
+	vpmuludq 336(%rsp),%xmm3,%xmm11
+	vpaddq %xmm11,%xmm7,%xmm7
+	vpmuludq 240(%rsp),%xmm2,%xmm11
+	vpmuludq 368(%rsp),%xmm3,%xmm13
+	vpaddq %xmm13,%xmm11,%xmm11
+	vpmuludq 272(%rsp),%xmm2,%xmm2
+	vpmuludq 400(%rsp),%xmm3,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpunpckhqdq %xmm9,%xmm12,%xmm3
+	vmovdqa %xmm3,16(%rsp)
+	vpshufd $0,%xmm12,%xmm3
+	vpshufd $0,%xmm9,%xmm9
+	vpmuludq 288(%rsp),%xmm3,%xmm12
+	vpaddq %xmm12,%xmm4,%xmm4
+	vpmuludq 416(%rsp),%xmm9,%xmm12
+	vpaddq %xmm12,%xmm4,%xmm4
+	vpmuludq 160(%rsp),%xmm3,%xmm12
+	vpaddq %xmm12,%xmm6,%xmm6
+	vpmuludq 432(%rsp),%xmm9,%xmm12
+	vpaddq %xmm12,%xmm6,%xmm6
+	vpmuludq 176(%rsp),%xmm3,%xmm12
+	vpaddq %xmm12,%xmm7,%xmm7
+	vpmuludq 304(%rsp),%xmm9,%xmm12
+	vpaddq %xmm12,%xmm7,%xmm7
+	vpmuludq 208(%rsp),%xmm3,%xmm12
+	vpaddq %xmm12,%xmm11,%xmm11
+	vpmuludq 336(%rsp),%xmm9,%xmm12
+	vpaddq %xmm12,%xmm11,%xmm11
+	vpmuludq 240(%rsp),%xmm3,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpmuludq 368(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpunpckhqdq %xmm14,%xmm1,%xmm3
+	vmovdqa %xmm3,32(%rsp)
+	vpshufd $0,%xmm1,%xmm1
+	vpshufd $0,%xmm14,%xmm3
+	vpmuludq 256(%rsp),%xmm1,%xmm9
+	vpaddq %xmm9,%xmm4,%xmm4
+	vpmuludq 384(%rsp),%xmm3,%xmm9
+	vpaddq %xmm9,%xmm4,%xmm4
+	vpmuludq 288(%rsp),%xmm1,%xmm9
+	vpaddq %xmm9,%xmm6,%xmm6
+	vpmuludq 416(%rsp),%xmm3,%xmm9
+	vpaddq %xmm9,%xmm6,%xmm6
+	vpmuludq 160(%rsp),%xmm1,%xmm9
+	vpaddq %xmm9,%xmm7,%xmm7
+	vpmuludq 432(%rsp),%xmm3,%xmm9
+	vpaddq %xmm9,%xmm7,%xmm7
+	vpmuludq 176(%rsp),%xmm1,%xmm9
+	vpaddq %xmm9,%xmm11,%xmm11
+	vpmuludq 304(%rsp),%xmm3,%xmm9
+	vpaddq %xmm9,%xmm11,%xmm11
+	vpmuludq 208(%rsp),%xmm1,%xmm1
+	vpaddq %xmm1,%xmm2,%xmm2
+	vpmuludq 336(%rsp),%xmm3,%xmm1
+	vpaddq %xmm1,%xmm2,%xmm2
+	vpunpckhqdq %xmm0,%xmm5,%xmm1
+	vmovdqa %xmm1,48(%rsp)
+	vpshufd $0,%xmm5,%xmm1
+	vpshufd $0,%xmm0,%xmm0
+	vpmuludq 224(%rsp),%xmm1,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 352(%rsp),%xmm0,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 256(%rsp),%xmm1,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 384(%rsp),%xmm0,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 288(%rsp),%xmm1,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq 416(%rsp),%xmm0,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq 160(%rsp),%xmm1,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 432(%rsp),%xmm0,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 176(%rsp),%xmm1,%xmm1
+	vpaddq %xmm1,%xmm2,%xmm2
+	vpmuludq 304(%rsp),%xmm0,%xmm0
+	vpaddq %xmm0,%xmm2,%xmm2
+	vpunpckhqdq %xmm10,%xmm8,%xmm0
+	vmovdqa %xmm0,64(%rsp)
+	vpshufd $0,%xmm8,%xmm0
+	vpshufd $0,%xmm10,%xmm1
+	vpmuludq 192(%rsp),%xmm0,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 320(%rsp),%xmm1,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 224(%rsp),%xmm0,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 352(%rsp),%xmm1,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 256(%rsp),%xmm0,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq 384(%rsp),%xmm1,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq 288(%rsp),%xmm0,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 416(%rsp),%xmm1,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 160(%rsp),%xmm0,%xmm0
+	vpaddq %xmm0,%xmm2,%xmm2
+	vpmuludq 432(%rsp),%xmm1,%xmm0
+	vpaddq %xmm0,%xmm2,%xmm2
+	vmovdqa %xmm4,80(%rsp)
+	vmovdqa %xmm6,96(%rsp)
+	vmovdqa %xmm7,112(%rsp)
+	vmovdqa %xmm11,448(%rsp)
+	vmovdqa %xmm2,496(%rsp)
+	vmovdqa 144(%rsp),%xmm0
+	vpmuludq %xmm0,%xmm0,%xmm1
+	vpaddq %xmm0,%xmm0,%xmm0
+	vmovdqa 128(%rsp),%xmm2
+	vpmuludq %xmm2,%xmm0,%xmm3
+	vmovdqa 480(%rsp),%xmm4
+	vpmuludq %xmm4,%xmm0,%xmm5
+	vmovdqa 464(%rsp),%xmm6
+	vpmuludq %xmm6,%xmm0,%xmm7
+	vmovdqa 528(%rsp),%xmm8
+	vpmuludq %xmm8,%xmm0,%xmm9
+	vpmuludq 512(%rsp),%xmm0,%xmm10
+	vpmuludq 592(%rsp),%xmm0,%xmm11
+	vpmuludq 576(%rsp),%xmm0,%xmm12
+	vpmuludq 624(%rsp),%xmm0,%xmm13
+	vmovdqa 672(%rsp),%xmm14
+	vpmuludq %xmm14,%xmm0,%xmm0
+	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm14,%xmm15
+	vpmuludq %xmm15,%xmm14,%xmm14
+	vpaddq %xmm14,%xmm13,%xmm13
+	vpaddq %xmm6,%xmm6,%xmm14
+	vpmuludq %xmm14,%xmm6,%xmm6
+	vpaddq %xmm6,%xmm11,%xmm11
+	vpaddq %xmm2,%xmm2,%xmm6
+	vpmuludq %xmm6,%xmm2,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpmuludq %xmm15,%xmm6,%xmm2
+	vpaddq %xmm2,%xmm1,%xmm1
+	vpmuludq %xmm15,%xmm4,%xmm2
+	vpaddq %xmm2,%xmm3,%xmm3
+	vpmuludq 544(%rsp),%xmm6,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpmuludq 592(%rsp),%xmm6,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 640(%rsp),%xmm6,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpmuludq 624(%rsp),%xmm6,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vpmuludq %xmm4,%xmm6,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpmuludq %xmm14,%xmm6,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpmuludq %xmm8,%xmm6,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq %xmm15,%xmm14,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpmuludq %xmm15,%xmm8,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpmuludq %xmm4,%xmm4,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpmuludq %xmm14,%xmm4,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpaddq %xmm4,%xmm4,%xmm2
+	vpmuludq %xmm8,%xmm2,%xmm4
+	vpaddq %xmm4,%xmm11,%xmm11
+	vpmuludq 688(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vpmuludq 688(%rsp),%xmm14,%xmm4
+	vpaddq %xmm4,%xmm3,%xmm3
+	vpmuludq 512(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm12,%xmm12
+	vpmuludq 592(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm13,%xmm13
+	vpmuludq 576(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vpmuludq 656(%rsp),%xmm8,%xmm2
+	vpaddq %xmm2,%xmm3,%xmm3
+	vpmuludq %xmm8,%xmm14,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq %xmm8,%xmm8,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpaddq %xmm8,%xmm8,%xmm2
+	vpmuludq 688(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm5,%xmm5
+	vpmuludq 544(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm9,%xmm9
+	vpmuludq 592(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm10,%xmm10
+	vpmuludq 656(%rsp),%xmm14,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vmovdqa 544(%rsp),%xmm4
+	vpmuludq 688(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm7,%xmm7
+	vpmuludq 544(%rsp),%xmm14,%xmm4
+	vpaddq %xmm4,%xmm13,%xmm13
+	vpmuludq 592(%rsp),%xmm14,%xmm4
+	vpaddq %xmm4,%xmm0,%xmm0
+	vpmuludq 640(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm11,%xmm11
+	vpmuludq 624(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm12,%xmm12
+	vmovdqa 592(%rsp),%xmm4
+	vpaddq %xmm4,%xmm4,%xmm4
+	vpmuludq 688(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm9,%xmm9
+	vpmuludq 608(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vmovdqa 544(%rsp),%xmm4
+	vpmuludq 608(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm3,%xmm3
+	vmovdqa 544(%rsp),%xmm4
+	vpmuludq 656(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm5,%xmm5
+	vmovdqa 592(%rsp),%xmm4
+	vpmuludq 656(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm7,%xmm7
+	vmovdqa 640(%rsp),%xmm4
+	vpmuludq 688(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm10,%xmm10
+	vpmuludq 512(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vmovdqa 560(%rsp),%xmm2
+	vpmuludq 512(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm1,%xmm1
+	vmovdqa 608(%rsp),%xmm2
+	vpmuludq 592(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vmovdqa 656(%rsp),%xmm2
+	vpmuludq 576(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vmovdqa 688(%rsp),%xmm2
+	vpmuludq 624(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpsrlq $26,%xmm1,%xmm2
+	vpaddq %xmm2,%xmm3,%xmm3
+	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
+	vpsrlq $25,%xmm10,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
+	vpsrlq $25,%xmm3,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3
+	vpsrlq $26,%xmm11,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
+	vpsrlq $26,%xmm5,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5
+	vpsrlq $25,%xmm12,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12
+	vpsrlq $25,%xmm7,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
+	vpsrlq $26,%xmm13,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13
+	vpsrlq $26,%xmm9,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
+	vpsrlq $25,%xmm0,%xmm2
+	vpsllq $4,%xmm2,%xmm4
+	vpaddq %xmm2,%xmm1,%xmm1
+	vpsllq $1,%xmm2,%xmm2
+	vpaddq %xmm2,%xmm4,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
+	vpsrlq $25,%xmm10,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
+	vpsrlq $26,%xmm1,%xmm2
+	vpaddq %xmm2,%xmm3,%xmm3
+	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
+	vpunpckhqdq %xmm3,%xmm1,%xmm2
+	vpunpcklqdq %xmm3,%xmm1,%xmm1
+	vmovdqa %xmm1,464(%rsp)
+	vpaddq curve25519_sandy2x_subc0(%rip),%xmm2,%xmm3
+	vpsubq %xmm1,%xmm3,%xmm3
+	vpunpckhqdq %xmm3,%xmm2,%xmm1
+	vpunpcklqdq %xmm3,%xmm2,%xmm2
+	vmovdqa %xmm2,480(%rsp)
+	vmovdqa %xmm1,512(%rsp)
+	vpsllq $1,%xmm1,%xmm1
+	vmovdqa %xmm1,528(%rsp)
+	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm3,%xmm3
+	vmovdqa 80(%rsp),%xmm1
+	vpunpcklqdq %xmm1,%xmm3,%xmm2
+	vpunpckhqdq %xmm1,%xmm3,%xmm1
+	vpunpckhqdq %xmm7,%xmm5,%xmm3
+	vpunpcklqdq %xmm7,%xmm5,%xmm4
+	vmovdqa %xmm4,544(%rsp)
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm5
+	vpsubq %xmm4,%xmm5,%xmm5
+	vpunpckhqdq %xmm5,%xmm3,%xmm4
+	vpunpcklqdq %xmm5,%xmm3,%xmm3
+	vmovdqa %xmm3,560(%rsp)
+	vmovdqa %xmm4,576(%rsp)
+	vpsllq $1,%xmm4,%xmm4
+	vmovdqa %xmm4,592(%rsp)
+	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm5,%xmm5
+	vmovdqa 96(%rsp),%xmm3
+	vpunpcklqdq %xmm3,%xmm5,%xmm4
+	vpunpckhqdq %xmm3,%xmm5,%xmm3
+	vpunpckhqdq %xmm10,%xmm9,%xmm5
+	vpunpcklqdq %xmm10,%xmm9,%xmm6
+	vmovdqa %xmm6,608(%rsp)
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm7
+	vpsubq %xmm6,%xmm7,%xmm7
+	vpunpckhqdq %xmm7,%xmm5,%xmm6
+	vpunpcklqdq %xmm7,%xmm5,%xmm5
+	vmovdqa %xmm5,624(%rsp)
+	vmovdqa %xmm6,640(%rsp)
+	vpsllq $1,%xmm6,%xmm6
+	vmovdqa %xmm6,656(%rsp)
+	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm7,%xmm7
+	vmovdqa 112(%rsp),%xmm5
+	vpunpcklqdq %xmm5,%xmm7,%xmm6
+	vpunpckhqdq %xmm5,%xmm7,%xmm5
+	vpunpckhqdq %xmm12,%xmm11,%xmm7
+	vpunpcklqdq %xmm12,%xmm11,%xmm8
+	vmovdqa %xmm8,672(%rsp)
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm9
+	vpsubq %xmm8,%xmm9,%xmm9
+	vpunpckhqdq %xmm9,%xmm7,%xmm8
+	vpunpcklqdq %xmm9,%xmm7,%xmm7
+	vmovdqa %xmm7,688(%rsp)
+	vmovdqa %xmm8,704(%rsp)
+	vpsllq $1,%xmm8,%xmm8
+	vmovdqa %xmm8,720(%rsp)
+	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm9,%xmm9
+	vmovdqa 448(%rsp),%xmm7
+	vpunpcklqdq %xmm7,%xmm9,%xmm8
+	vpunpckhqdq %xmm7,%xmm9,%xmm7
+	vpunpckhqdq %xmm0,%xmm13,%xmm9
+	vpunpcklqdq %xmm0,%xmm13,%xmm0
+	vmovdqa %xmm0,448(%rsp)
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm9,%xmm10
+	vpsubq %xmm0,%xmm10,%xmm10
+	vpunpckhqdq %xmm10,%xmm9,%xmm0
+	vpunpcklqdq %xmm10,%xmm9,%xmm9
+	vmovdqa %xmm9,736(%rsp)
+	vmovdqa %xmm0,752(%rsp)
+	vpsllq $1,%xmm0,%xmm0
+	vmovdqa %xmm0,768(%rsp)
+	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm10,%xmm10
+	vmovdqa 496(%rsp),%xmm0
+	vpunpcklqdq %xmm0,%xmm10,%xmm9
+	vpunpckhqdq %xmm0,%xmm10,%xmm0
+	vpsrlq $26,%xmm2,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2
+	vpsrlq $25,%xmm5,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
+	vpsrlq $25,%xmm1,%xmm10
+	vpaddq %xmm10,%xmm4,%xmm4
+	vpand curve25519_sandy2x_m25(%rip),%xmm1,%xmm1
+	vpsrlq $26,%xmm8,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
+	vpsrlq $26,%xmm4,%xmm10
+	vpaddq %xmm10,%xmm3,%xmm3
+	vpand curve25519_sandy2x_m26(%rip),%xmm4,%xmm4
+	vpsrlq $25,%xmm7,%xmm10
+	vpaddq %xmm10,%xmm9,%xmm9
+	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
+	vpsrlq $25,%xmm3,%xmm10
+	vpaddq %xmm10,%xmm6,%xmm6
+	vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3
+	vpsrlq $26,%xmm9,%xmm10
+	vpaddq %xmm10,%xmm0,%xmm0
+	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
+	vpsrlq $26,%xmm6,%xmm10
+	vpaddq %xmm10,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
+	vpsrlq $25,%xmm0,%xmm10
+	vpsllq $4,%xmm10,%xmm11
+	vpaddq %xmm10,%xmm2,%xmm2
+	vpsllq $1,%xmm10,%xmm10
+	vpaddq %xmm10,%xmm11,%xmm11
+	vpaddq %xmm11,%xmm2,%xmm2
+	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
+	vpsrlq $25,%xmm5,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
+	vpsrlq $26,%xmm2,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2
+	vpunpckhqdq %xmm1,%xmm2,%xmm10
+	vmovdqa %xmm10,80(%rsp)
+	vpunpcklqdq %xmm1,%xmm2,%xmm1
+	vpunpckhqdq %xmm3,%xmm4,%xmm2
+	vmovdqa %xmm2,96(%rsp)
+	vpunpcklqdq %xmm3,%xmm4,%xmm2
+	vpunpckhqdq %xmm5,%xmm6,%xmm3
+	vmovdqa %xmm3,112(%rsp)
+	vpunpcklqdq %xmm5,%xmm6,%xmm3
+	vpunpckhqdq %xmm7,%xmm8,%xmm4
+	vmovdqa %xmm4,128(%rsp)
+	vpunpcklqdq %xmm7,%xmm8,%xmm4
+	vpunpckhqdq %xmm0,%xmm9,%xmm5
+	vmovdqa %xmm5,144(%rsp)
+	vpunpcklqdq %xmm0,%xmm9,%xmm0
+	vmovdqa 464(%rsp),%xmm5
+	vpaddq %xmm5,%xmm1,%xmm1
+	vpunpcklqdq %xmm1,%xmm5,%xmm6
+	vpunpckhqdq %xmm1,%xmm5,%xmm1
+	vpmuludq 512(%rsp),%xmm6,%xmm5
+	vpmuludq 480(%rsp),%xmm1,%xmm7
+	vpaddq %xmm7,%xmm5,%xmm5
+	vpmuludq 560(%rsp),%xmm6,%xmm7
+	vpmuludq 528(%rsp),%xmm1,%xmm8
+	vpaddq %xmm8,%xmm7,%xmm7
+	vpmuludq 576(%rsp),%xmm6,%xmm8
+	vpmuludq 560(%rsp),%xmm1,%xmm9
+	vpaddq %xmm9,%xmm8,%xmm8
+	vpmuludq 624(%rsp),%xmm6,%xmm9
+	vpmuludq 592(%rsp),%xmm1,%xmm10
+	vpaddq %xmm10,%xmm9,%xmm9
+	vpmuludq 640(%rsp),%xmm6,%xmm10
+	vpmuludq 624(%rsp),%xmm1,%xmm11
+	vpaddq %xmm11,%xmm10,%xmm10
+	vpmuludq 688(%rsp),%xmm6,%xmm11
+	vpmuludq 656(%rsp),%xmm1,%xmm12
+	vpaddq %xmm12,%xmm11,%xmm11
+	vpmuludq 704(%rsp),%xmm6,%xmm12
+	vpmuludq 688(%rsp),%xmm1,%xmm13
+	vpaddq %xmm13,%xmm12,%xmm12
+	vpmuludq 736(%rsp),%xmm6,%xmm13
+	vpmuludq 720(%rsp),%xmm1,%xmm14
+	vpaddq %xmm14,%xmm13,%xmm13
+	vpmuludq 752(%rsp),%xmm6,%xmm14
+	vpmuludq 736(%rsp),%xmm1,%xmm15
+	vpaddq %xmm15,%xmm14,%xmm14
+	vpmuludq 480(%rsp),%xmm6,%xmm6
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
+	vpmuludq 768(%rsp),%xmm1,%xmm1
+	vpaddq %xmm1,%xmm6,%xmm6
+	vmovdqa 544(%rsp),%xmm1
+	vpaddq %xmm1,%xmm2,%xmm2
+	vpunpcklqdq %xmm2,%xmm1,%xmm15
+	vpunpckhqdq %xmm2,%xmm1,%xmm1
+	vpmuludq 480(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpmuludq 512(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpmuludq 560(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpmuludq 576(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 624(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpmuludq 640(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 688(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpmuludq 704(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm15,%xmm15
+	vpmuludq 736(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm6,%xmm6
+	vpmuludq 752(%rsp),%xmm15,%xmm15
+	vpaddq %xmm15,%xmm5,%xmm5
+	vpmuludq 480(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpmuludq 528(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpmuludq 560(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 592(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpmuludq 624(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 656(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpmuludq 688(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
+	vpmuludq 720(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm6,%xmm6
+	vpmuludq 736(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpmuludq 768(%rsp),%xmm1,%xmm1
+	vpaddq %xmm1,%xmm7,%xmm7
+	vmovdqa 608(%rsp),%xmm1
+	vpaddq %xmm1,%xmm3,%xmm3
+	vpunpcklqdq %xmm3,%xmm1,%xmm2
+	vpunpckhqdq %xmm3,%xmm1,%xmm1
+	vpmuludq 480(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm9,%xmm9
+	vpmuludq 512(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm10,%xmm10
+	vpmuludq 560(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 576(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm12,%xmm12
+	vpmuludq 624(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 640(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
+	vpmuludq 688(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 704(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 736(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq 752(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpmuludq 480(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 528(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpmuludq 560(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 592(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpmuludq 624(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
+	vpmuludq 656(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm6,%xmm6
+	vpmuludq 688(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpmuludq 720(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpmuludq 736(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpmuludq 768(%rsp),%xmm1,%xmm1
+	vpaddq %xmm1,%xmm9,%xmm9
+	vmovdqa 672(%rsp),%xmm1
+	vpaddq %xmm1,%xmm4,%xmm4
+	vpunpcklqdq %xmm4,%xmm1,%xmm2
+	vpunpckhqdq %xmm4,%xmm1,%xmm1
+	vpmuludq 480(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 512(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm12,%xmm12
+	vpmuludq 560(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 576(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
+	vpmuludq 624(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 640(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 688(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq 704(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm8,%xmm8
+	vpmuludq 736(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm9,%xmm9
+	vpmuludq 752(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 480(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 528(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpmuludq 560(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
+	vpmuludq 592(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm6,%xmm6
+	vpmuludq 624(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpmuludq 656(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpmuludq 688(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpmuludq 720(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpmuludq 736(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 768(%rsp),%xmm1,%xmm1
+	vpaddq %xmm1,%xmm11,%xmm11
+	vmovdqa 448(%rsp),%xmm1
+	vpaddq %xmm1,%xmm0,%xmm0
+	vpunpcklqdq %xmm0,%xmm1,%xmm2
+	vpunpckhqdq %xmm0,%xmm1,%xmm0
+	vpmuludq 480(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm13,%xmm13
+	vpmuludq 512(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
+	vpmuludq 560(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm6,%xmm6
+	vpmuludq 576(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm5,%xmm5
+	vpmuludq 624(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm7,%xmm7
+	vpmuludq 640(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm8,%xmm8
+	vpmuludq 688(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm9,%xmm9
+	vpmuludq 704(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm10,%xmm10
+	vpmuludq 736(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm11,%xmm11
+	vpmuludq 752(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 480(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm0,%xmm0
+	vpmuludq 528(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm6,%xmm6
+	vpmuludq 560(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm5,%xmm5
+	vpmuludq 592(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm7,%xmm7
+	vpmuludq 624(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm8,%xmm8
+	vpmuludq 656(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm9,%xmm9
+	vpmuludq 688(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm10,%xmm10
+	vpmuludq 720(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm11,%xmm11
+	vpmuludq 736(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm12,%xmm12
+	vpmuludq 768(%rsp),%xmm0,%xmm0
+	vpaddq %xmm0,%xmm13,%xmm13
+	vpsrlq $26,%xmm6,%xmm0
+	vpaddq %xmm0,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
+	vpsrlq $25,%xmm10,%xmm0
+	vpaddq %xmm0,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
+	vpsrlq $25,%xmm5,%xmm0
+	vpaddq %xmm0,%xmm7,%xmm7
+	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
+	vpsrlq $26,%xmm11,%xmm0
+	vpaddq %xmm0,%xmm12,%xmm12
+	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
+	vpsrlq $26,%xmm7,%xmm0
+	vpaddq %xmm0,%xmm8,%xmm8
+	vpand curve25519_sandy2x_m26(%rip),%xmm7,%xmm7
+	vpsrlq $25,%xmm12,%xmm0
+	vpaddq %xmm0,%xmm13,%xmm13
+	vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12
+	vpsrlq $25,%xmm8,%xmm0
+	vpaddq %xmm0,%xmm9,%xmm9
+	vpand curve25519_sandy2x_m25(%rip),%xmm8,%xmm8
+	vpsrlq $26,%xmm13,%xmm0
+	vpaddq %xmm0,%xmm14,%xmm14
+	vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13
+	vpsrlq $26,%xmm9,%xmm0
+	vpaddq %xmm0,%xmm10,%xmm10
+	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
+	vpsrlq $25,%xmm14,%xmm0
+	vpsllq $4,%xmm0,%xmm1
+	vpaddq %xmm0,%xmm6,%xmm6
+	vpsllq $1,%xmm0,%xmm0
+	vpaddq %xmm0,%xmm1,%xmm1
+	vpaddq %xmm1,%xmm6,%xmm6
+	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
+	vpsrlq $25,%xmm10,%xmm0
+	vpaddq %xmm0,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
+	vpsrlq $26,%xmm6,%xmm0
+	vpaddq %xmm0,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
+	vpunpckhqdq %xmm5,%xmm6,%xmm1
+	vpunpcklqdq %xmm5,%xmm6,%xmm0
+	vpunpckhqdq %xmm8,%xmm7,%xmm3
+	vpunpcklqdq %xmm8,%xmm7,%xmm2
+	vpunpckhqdq %xmm10,%xmm9,%xmm5
+	vpunpcklqdq %xmm10,%xmm9,%xmm4
+	vpunpckhqdq %xmm12,%xmm11,%xmm7
+	vpunpcklqdq %xmm12,%xmm11,%xmm6
+	vpunpckhqdq %xmm14,%xmm13,%xmm9
+	vpunpcklqdq %xmm14,%xmm13,%xmm8
+	cmp $0,%rdx
+	jne .Lladder_loop
+	vmovdqu %xmm1,160(%rdi)
+	vmovdqu %xmm0,80(%rdi)
+	vmovdqu %xmm3,176(%rdi)
+	vmovdqu %xmm2,96(%rdi)
+	vmovdqu %xmm5,192(%rdi)
+	vmovdqu %xmm4,112(%rdi)
+	vmovdqu %xmm7,208(%rdi)
+	vmovdqu %xmm6,128(%rdi)
+	vmovdqu %xmm9,224(%rdi)
+	vmovdqu %xmm8,144(%rdi)
+	movq 1824(%rsp),%r11
+	movq 1832(%rsp),%r12
+	movq 1840(%rsp),%r13
+	movq 1848(%rsp),%r14
+	leave
+	ret
+ENDPROC(curve25519_sandy2x_ladder)
+
+.align 32
+ENTRY(curve25519_sandy2x_ladder_base)
+	push %rbp
+	mov %rsp,%rbp
+	sub $1568,%rsp
+	and $-32,%rsp
+	movq %r11,1536(%rsp)
+	movq %r12,1544(%rsp)
+	movq %r13,1552(%rsp)
+	vmovdqa curve25519_sandy2x_v0_0(%rip),%xmm0
+	vmovdqa curve25519_sandy2x_v1_0(%rip),%xmm1
+	vmovdqa curve25519_sandy2x_v9_0(%rip),%xmm2
+	vmovdqa %xmm2,0(%rsp)
+	vmovdqa %xmm0,16(%rsp)
+	vmovdqa %xmm0,32(%rsp)
+	vmovdqa %xmm0,48(%rsp)
+	vmovdqa %xmm0,64(%rsp)
+	vmovdqa %xmm1,80(%rsp)
+	vmovdqa %xmm0,96(%rsp)
+	vmovdqa %xmm0,112(%rsp)
+	vmovdqa %xmm0,128(%rsp)
+	vmovdqa %xmm0,144(%rsp)
+	vmovdqa %xmm1,%xmm0
+	vpxor %xmm1,%xmm1,%xmm1
+	vpxor %xmm2,%xmm2,%xmm2
+	vpxor %xmm3,%xmm3,%xmm3
+	vpxor %xmm4,%xmm4,%xmm4
+	vpxor %xmm5,%xmm5,%xmm5
+	vpxor %xmm6,%xmm6,%xmm6
+	vpxor %xmm7,%xmm7,%xmm7
+	vpxor %xmm8,%xmm8,%xmm8
+	vpxor %xmm9,%xmm9,%xmm9
+	movq 0(%rsi),%rdx
+	movq 8(%rsi),%rcx
+	movq 16(%rsi),%r8
+	movq 24(%rsi),%r9
+	shrd $1,%rcx,%rdx
+	shrd $1,%r8,%rcx
+	shrd $1,%r9,%r8
+	shr $1,%r9
+	xorq 0(%rsi),%rdx
+	xorq 8(%rsi),%rcx
+	xorq 16(%rsi),%r8
+	xorq 24(%rsi),%r9
+	leaq 512(%rsp),%rsi
+	mov $64,%rax
+
+	.align 16
+	.Lladder_base_small_loop:
+	mov %rdx,%r10
+	mov %rcx,%r11
+	mov %r8,%r12
+	mov %r9,%r13
+	shr $1,%rdx
+	shr $1,%rcx
+	shr $1,%r8
+	shr $1,%r9
+	and $1,%r10d
+	and $1,%r11d
+	and $1,%r12d
+	and $1,%r13d
+	neg %r10
+	neg %r11
+	neg %r12
+	neg %r13
+	movl %r10d,0(%rsi)
+	movl %r11d,256(%rsi)
+	movl %r12d,512(%rsi)
+	movl %r13d,768(%rsi)
+	add $4,%rsi
+	sub $1,%rax
+	jne .Lladder_base_small_loop
+	mov $255,%rdx
+	add $760,%rsi
+
+	.align 16
+	.Lladder_base_loop:
+	sub $1,%rdx
+	vbroadcastss 0(%rsi),%xmm10
+	sub $4,%rsi
+	vmovdqa 0(%rsp),%xmm11
+	vmovdqa 80(%rsp),%xmm12
+	vpxor %xmm11,%xmm0,%xmm13
+	vpand %xmm10,%xmm13,%xmm13
+	vpxor %xmm13,%xmm0,%xmm0
+	vpxor %xmm13,%xmm11,%xmm11
+	vpxor %xmm12,%xmm1,%xmm13
+	vpand %xmm10,%xmm13,%xmm13
+	vpxor %xmm13,%xmm1,%xmm1
+	vpxor %xmm13,%xmm12,%xmm12
+	vmovdqa 16(%rsp),%xmm13
+	vmovdqa 96(%rsp),%xmm14
+	vpxor %xmm13,%xmm2,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm2,%xmm2
+	vpxor %xmm15,%xmm13,%xmm13
+	vpxor %xmm14,%xmm3,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm3,%xmm3
+	vpxor %xmm15,%xmm14,%xmm14
+	vmovdqa %xmm13,0(%rsp)
+	vmovdqa %xmm14,16(%rsp)
+	vmovdqa 32(%rsp),%xmm13
+	vmovdqa 112(%rsp),%xmm14
+	vpxor %xmm13,%xmm4,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm4,%xmm4
+	vpxor %xmm15,%xmm13,%xmm13
+	vpxor %xmm14,%xmm5,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm5,%xmm5
+	vpxor %xmm15,%xmm14,%xmm14
+	vmovdqa %xmm13,32(%rsp)
+	vmovdqa %xmm14,80(%rsp)
+	vmovdqa 48(%rsp),%xmm13
+	vmovdqa 128(%rsp),%xmm14
+	vpxor %xmm13,%xmm6,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm6,%xmm6
+	vpxor %xmm15,%xmm13,%xmm13
+	vpxor %xmm14,%xmm7,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm7,%xmm7
+	vpxor %xmm15,%xmm14,%xmm14
+	vmovdqa %xmm13,48(%rsp)
+	vmovdqa %xmm14,96(%rsp)
+	vmovdqa 64(%rsp),%xmm13
+	vmovdqa 144(%rsp),%xmm14
+	vpxor %xmm13,%xmm8,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm8,%xmm8
+	vpxor %xmm15,%xmm13,%xmm13
+	vpxor %xmm14,%xmm9,%xmm15
+	vpand %xmm10,%xmm15,%xmm15
+	vpxor %xmm15,%xmm9,%xmm9
+	vpxor %xmm15,%xmm14,%xmm14
+	vmovdqa %xmm13,64(%rsp)
+	vmovdqa %xmm14,112(%rsp)
+	vpaddq curve25519_sandy2x_subc0(%rip),%xmm11,%xmm10
+	vpsubq %xmm12,%xmm10,%xmm10
+	vpaddq %xmm12,%xmm11,%xmm11
+	vpunpckhqdq %xmm10,%xmm11,%xmm12
+	vpunpcklqdq %xmm10,%xmm11,%xmm10
+	vpaddq %xmm1,%xmm0,%xmm11
+	vpaddq curve25519_sandy2x_subc0(%rip),%xmm0,%xmm0
+	vpsubq %xmm1,%xmm0,%xmm0
+	vpunpckhqdq %xmm11,%xmm0,%xmm1
+	vpunpcklqdq %xmm11,%xmm0,%xmm0
+	vpmuludq %xmm0,%xmm10,%xmm11
+	vpmuludq %xmm1,%xmm10,%xmm13
+	vmovdqa %xmm1,128(%rsp)
+	vpaddq %xmm1,%xmm1,%xmm1
+	vpmuludq %xmm0,%xmm12,%xmm14
+	vmovdqa %xmm0,144(%rsp)
+	vpaddq %xmm14,%xmm13,%xmm13
+	vpmuludq %xmm1,%xmm12,%xmm0
+	vmovdqa %xmm1,160(%rsp)
+	vpaddq %xmm3,%xmm2,%xmm1
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm2,%xmm2
+	vpsubq %xmm3,%xmm2,%xmm2
+	vpunpckhqdq %xmm1,%xmm2,%xmm3
+	vpunpcklqdq %xmm1,%xmm2,%xmm1
+	vpmuludq %xmm1,%xmm10,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vpmuludq %xmm3,%xmm10,%xmm2
+	vmovdqa %xmm3,176(%rsp)
+	vpaddq %xmm3,%xmm3,%xmm3
+	vpmuludq %xmm1,%xmm12,%xmm14
+	vmovdqa %xmm1,192(%rsp)
+	vpaddq %xmm14,%xmm2,%xmm2
+	vpmuludq %xmm3,%xmm12,%xmm1
+	vmovdqa %xmm3,208(%rsp)
+	vpaddq %xmm5,%xmm4,%xmm3
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm4,%xmm4
+	vpsubq %xmm5,%xmm4,%xmm4
+	vpunpckhqdq %xmm3,%xmm4,%xmm5
+	vpunpcklqdq %xmm3,%xmm4,%xmm3
+	vpmuludq %xmm3,%xmm10,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vpmuludq %xmm5,%xmm10,%xmm4
+	vmovdqa %xmm5,224(%rsp)
+	vpaddq %xmm5,%xmm5,%xmm5
+	vpmuludq %xmm3,%xmm12,%xmm14
+	vmovdqa %xmm3,240(%rsp)
+	vpaddq %xmm14,%xmm4,%xmm4
+	vpaddq %xmm7,%xmm6,%xmm3
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm6,%xmm6
+	vpsubq %xmm7,%xmm6,%xmm6
+	vpunpckhqdq %xmm3,%xmm6,%xmm7
+	vpunpcklqdq %xmm3,%xmm6,%xmm3
+	vpmuludq %xmm3,%xmm10,%xmm6
+	vpmuludq %xmm5,%xmm12,%xmm14
+	vmovdqa %xmm5,256(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm5,%xmm5
+	vmovdqa %xmm5,272(%rsp)
+	vpaddq %xmm14,%xmm6,%xmm6
+	vpmuludq %xmm7,%xmm10,%xmm5
+	vmovdqa %xmm7,288(%rsp)
+	vpaddq %xmm7,%xmm7,%xmm7
+	vpmuludq %xmm3,%xmm12,%xmm14
+	vmovdqa %xmm3,304(%rsp)
+	vpaddq %xmm14,%xmm5,%xmm5
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vmovdqa %xmm3,320(%rsp)
+	vpaddq %xmm9,%xmm8,%xmm3
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm8,%xmm8
+	vpsubq %xmm9,%xmm8,%xmm8
+	vpunpckhqdq %xmm3,%xmm8,%xmm9
+	vpunpcklqdq %xmm3,%xmm8,%xmm3
+	vmovdqa %xmm3,336(%rsp)
+	vpmuludq %xmm7,%xmm12,%xmm8
+	vmovdqa %xmm7,352(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm7,%xmm7
+	vmovdqa %xmm7,368(%rsp)
+	vpmuludq %xmm3,%xmm10,%xmm7
+	vpaddq %xmm7,%xmm8,%xmm8
+	vpmuludq %xmm9,%xmm10,%xmm7
+	vmovdqa %xmm9,384(%rsp)
+	vpaddq %xmm9,%xmm9,%xmm9
+	vpmuludq %xmm3,%xmm12,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vmovdqa %xmm3,400(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm12,%xmm12
+	vpmuludq %xmm9,%xmm12,%xmm3
+	vmovdqa %xmm9,416(%rsp)
+	vpaddq %xmm3,%xmm11,%xmm11
+	vmovdqa 0(%rsp),%xmm3
+	vmovdqa 16(%rsp),%xmm9
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
+	vpsubq %xmm9,%xmm10,%xmm10
+	vpaddq %xmm9,%xmm3,%xmm3
+	vpunpckhqdq %xmm10,%xmm3,%xmm9
+	vpunpcklqdq %xmm10,%xmm3,%xmm3
+	vpmuludq 144(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm0,%xmm0
+	vpmuludq 128(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm2,%xmm2
+	vpmuludq 192(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpmuludq 176(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm4,%xmm4
+	vpmuludq 240(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm6,%xmm6
+	vpmuludq 224(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm5,%xmm5
+	vpmuludq 304(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpmuludq 288(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vpmuludq 336(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm11,%xmm11
+	vpmuludq 384(%rsp),%xmm3,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 144(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpmuludq 160(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm1,%xmm1
+	vpmuludq 192(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 208(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 240(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 256(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm8,%xmm8
+	vpmuludq 304(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
+	vpmuludq 352(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 336(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 416(%rsp),%xmm9,%xmm9
+	vpaddq %xmm9,%xmm0,%xmm0
+	vmovdqa 32(%rsp),%xmm3
+	vmovdqa 80(%rsp),%xmm9
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
+	vpsubq %xmm9,%xmm10,%xmm10
+	vpaddq %xmm9,%xmm3,%xmm3
+	vpunpckhqdq %xmm10,%xmm3,%xmm9
+	vpunpcklqdq %xmm10,%xmm3,%xmm3
+	vpmuludq 144(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpmuludq 128(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm4,%xmm4
+	vpmuludq 192(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm6,%xmm6
+	vpmuludq 176(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm5,%xmm5
+	vpmuludq 240(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpmuludq 224(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vpmuludq 304(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm11,%xmm11
+	vpmuludq 288(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm13,%xmm13
+	vpmuludq 336(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm0,%xmm0
+	vpmuludq 384(%rsp),%xmm3,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpmuludq 144(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 160(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 192(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 208(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm8,%xmm8
+	vpmuludq 240(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
+	vpmuludq 256(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 304(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 352(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm0,%xmm0
+	vpmuludq 336(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpmuludq 416(%rsp),%xmm9,%xmm9
+	vpaddq %xmm9,%xmm1,%xmm1
+	vmovdqa 48(%rsp),%xmm3
+	vmovdqa 96(%rsp),%xmm9
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
+	vpsubq %xmm9,%xmm10,%xmm10
+	vpaddq %xmm9,%xmm3,%xmm3
+	vpunpckhqdq %xmm10,%xmm3,%xmm9
+	vpunpcklqdq %xmm10,%xmm3,%xmm3
+	vpmuludq 144(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm6,%xmm6
+	vpmuludq 128(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm5,%xmm5
+	vpmuludq 192(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpmuludq 176(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vpmuludq 240(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm11,%xmm11
+	vpmuludq 224(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm13,%xmm13
+	vpmuludq 304(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm0,%xmm0
+	vpmuludq 288(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm2,%xmm2
+	vpmuludq 336(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpmuludq 384(%rsp),%xmm3,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 144(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 160(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm8,%xmm8
+	vpmuludq 192(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
+	vpmuludq 208(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 240(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 256(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm0,%xmm0
+	vpmuludq 304(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpmuludq 352(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm1,%xmm1
+	vpmuludq 336(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 416(%rsp),%xmm9,%xmm9
+	vpaddq %xmm9,%xmm6,%xmm6
+	vmovdqa 64(%rsp),%xmm3
+	vmovdqa 112(%rsp),%xmm9
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
+	vpsubq %xmm9,%xmm10,%xmm10
+	vpaddq %xmm9,%xmm3,%xmm3
+	vpunpckhqdq %xmm10,%xmm3,%xmm9
+	vpunpcklqdq %xmm10,%xmm3,%xmm3
+	vpmuludq 144(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpmuludq 128(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
+	vpmuludq 192(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm11,%xmm11
+	vpmuludq 176(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm13,%xmm13
+	vpmuludq 240(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm0,%xmm0
+	vpmuludq 224(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm2,%xmm2
+	vpmuludq 304(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpmuludq 288(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm4,%xmm4
+	vpmuludq 336(%rsp),%xmm3,%xmm10
+	vpaddq %xmm10,%xmm6,%xmm6
+	vpmuludq 384(%rsp),%xmm3,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 144(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
+	vpmuludq 160(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 192(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 208(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm0,%xmm0
+	vpmuludq 240(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpmuludq 256(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm1,%xmm1
+	vpmuludq 304(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpmuludq 352(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 336(%rsp),%xmm9,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 416(%rsp),%xmm9,%xmm9
+	vpaddq %xmm9,%xmm8,%xmm8
+	vpsrlq $25,%xmm4,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4
+	vpsrlq $26,%xmm11,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
+	vpsrlq $26,%xmm6,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
+	vpsrlq $25,%xmm13,%xmm3
+	vpaddq %xmm3,%xmm0,%xmm0
+	vpand curve25519_sandy2x_m25(%rip),%xmm13,%xmm13
+	vpsrlq $25,%xmm5,%xmm3
+	vpaddq %xmm3,%xmm8,%xmm8
+	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
+	vpsrlq $26,%xmm0,%xmm3
+	vpaddq %xmm3,%xmm2,%xmm2
+	vpand curve25519_sandy2x_m26(%rip),%xmm0,%xmm0
+	vpsrlq $26,%xmm8,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
+	vpsrlq $25,%xmm2,%xmm3
+	vpaddq %xmm3,%xmm1,%xmm1
+	vpand curve25519_sandy2x_m25(%rip),%xmm2,%xmm2
+	vpsrlq $25,%xmm7,%xmm3
+	vpsllq $4,%xmm3,%xmm9
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpsllq $1,%xmm3,%xmm3
+	vpaddq %xmm3,%xmm9,%xmm9
+	vpaddq %xmm9,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
+	vpsrlq $26,%xmm1,%xmm3
+	vpaddq %xmm3,%xmm4,%xmm4
+	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
+	vpsrlq $26,%xmm11,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
+	vpsrlq $25,%xmm4,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4
+	vpunpcklqdq %xmm13,%xmm11,%xmm3
+	vpunpckhqdq %xmm13,%xmm11,%xmm9
+	vpaddq curve25519_sandy2x_subc0(%rip),%xmm9,%xmm10
+	vpsubq %xmm3,%xmm10,%xmm10
+	vpaddq %xmm9,%xmm3,%xmm3
+	vpunpckhqdq %xmm3,%xmm10,%xmm9
+	vpunpcklqdq %xmm3,%xmm10,%xmm10
+	vpmuludq %xmm10,%xmm10,%xmm3
+	vpaddq %xmm10,%xmm10,%xmm10
+	vpmuludq %xmm9,%xmm10,%xmm11
+	vpunpcklqdq %xmm2,%xmm0,%xmm12
+	vpunpckhqdq %xmm2,%xmm0,%xmm0
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm0,%xmm2
+	vpsubq %xmm12,%xmm2,%xmm2
+	vpaddq %xmm0,%xmm12,%xmm12
+	vpunpckhqdq %xmm12,%xmm2,%xmm0
+	vpunpcklqdq %xmm12,%xmm2,%xmm2
+	vpmuludq %xmm2,%xmm10,%xmm12
+	vpaddq %xmm9,%xmm9,%xmm13
+	vpmuludq %xmm13,%xmm9,%xmm9
+	vpaddq %xmm9,%xmm12,%xmm12
+	vpmuludq %xmm0,%xmm10,%xmm9
+	vpmuludq %xmm2,%xmm13,%xmm14
+	vpaddq %xmm14,%xmm9,%xmm9
+	vpunpcklqdq %xmm4,%xmm1,%xmm14
+	vpunpckhqdq %xmm4,%xmm1,%xmm1
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm1,%xmm4
+	vpsubq %xmm14,%xmm4,%xmm4
+	vpaddq %xmm1,%xmm14,%xmm14
+	vpunpckhqdq %xmm14,%xmm4,%xmm1
+	vpunpcklqdq %xmm14,%xmm4,%xmm4
+	vmovdqa %xmm1,0(%rsp)
+	vpaddq %xmm1,%xmm1,%xmm1
+	vmovdqa %xmm1,16(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
+	vmovdqa %xmm1,32(%rsp)
+	vpmuludq %xmm4,%xmm10,%xmm1
+	vpmuludq %xmm2,%xmm2,%xmm14
+	vpaddq %xmm14,%xmm1,%xmm1
+	vpmuludq 0(%rsp),%xmm10,%xmm14
+	vpmuludq %xmm4,%xmm13,%xmm15
+	vpaddq %xmm15,%xmm14,%xmm14
+	vpunpcklqdq %xmm5,%xmm6,%xmm15
+	vpunpckhqdq %xmm5,%xmm6,%xmm5
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm6
+	vpsubq %xmm15,%xmm6,%xmm6
+	vpaddq %xmm5,%xmm15,%xmm15
+	vpunpckhqdq %xmm15,%xmm6,%xmm5
+	vpunpcklqdq %xmm15,%xmm6,%xmm6
+	vmovdqa %xmm6,48(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm6,%xmm6
+	vmovdqa %xmm6,64(%rsp)
+	vmovdqa %xmm5,80(%rsp)
+	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm5,%xmm5
+	vmovdqa %xmm5,96(%rsp)
+	vpmuludq 48(%rsp),%xmm10,%xmm5
+	vpaddq %xmm0,%xmm0,%xmm6
+	vpmuludq %xmm6,%xmm0,%xmm0
+	vpaddq %xmm0,%xmm5,%xmm5
+	vpmuludq 80(%rsp),%xmm10,%xmm0
+	vpmuludq %xmm4,%xmm6,%xmm15
+	vpaddq %xmm15,%xmm0,%xmm0
+	vpmuludq %xmm6,%xmm13,%xmm15
+	vpaddq %xmm15,%xmm1,%xmm1
+	vpmuludq %xmm6,%xmm2,%xmm15
+	vpaddq %xmm15,%xmm14,%xmm14
+	vpunpcklqdq %xmm7,%xmm8,%xmm15
+	vpunpckhqdq %xmm7,%xmm8,%xmm7
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm8
+	vpsubq %xmm15,%xmm8,%xmm8
+	vpaddq %xmm7,%xmm15,%xmm15
+	vpunpckhqdq %xmm15,%xmm8,%xmm7
+	vpunpcklqdq %xmm15,%xmm8,%xmm8
+	vmovdqa %xmm8,112(%rsp)
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm8,%xmm8
+	vmovdqa %xmm8,160(%rsp)
+	vpmuludq 112(%rsp),%xmm10,%xmm8
+	vpmuludq %xmm7,%xmm10,%xmm10
+	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm7,%xmm15
+	vpmuludq %xmm15,%xmm7,%xmm7
+	vpaddq %xmm7,%xmm8,%xmm8
+	vpmuludq %xmm15,%xmm13,%xmm7
+	vpaddq %xmm7,%xmm3,%xmm3
+	vpmuludq %xmm15,%xmm2,%xmm7
+	vpaddq %xmm7,%xmm11,%xmm11
+	vpmuludq 80(%rsp),%xmm13,%xmm7
+	vpaddq %xmm7,%xmm7,%xmm7
+	vpaddq %xmm7,%xmm8,%xmm8
+	vpmuludq 16(%rsp),%xmm13,%xmm7
+	vpaddq %xmm7,%xmm5,%xmm5
+	vpmuludq 48(%rsp),%xmm13,%xmm7
+	vpaddq %xmm7,%xmm0,%xmm0
+	vpmuludq 112(%rsp),%xmm13,%xmm7
+	vpaddq %xmm7,%xmm10,%xmm10
+	vpmuludq %xmm15,%xmm6,%xmm7
+	vpaddq %xmm7,%xmm12,%xmm12
+	vpmuludq %xmm15,%xmm4,%xmm7
+	vpaddq %xmm7,%xmm9,%xmm9
+	vpaddq %xmm2,%xmm2,%xmm2
+	vpmuludq %xmm4,%xmm2,%xmm7
+	vpaddq %xmm7,%xmm5,%xmm5
+	vpmuludq 160(%rsp),%xmm2,%xmm7
+	vpaddq %xmm7,%xmm3,%xmm3
+	vpmuludq 160(%rsp),%xmm6,%xmm7
+	vpaddq %xmm7,%xmm11,%xmm11
+	vpmuludq 0(%rsp),%xmm2,%xmm7
+	vpaddq %xmm7,%xmm0,%xmm0
+	vpmuludq 48(%rsp),%xmm2,%xmm7
+	vpaddq %xmm7,%xmm8,%xmm8
+	vpmuludq 80(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 96(%rsp),%xmm4,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpmuludq %xmm4,%xmm4,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpaddq %xmm4,%xmm4,%xmm2
+	vpmuludq 160(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm12,%xmm12
+	vpmuludq 16(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vpmuludq 48(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm14,%xmm14
+	vpmuludq 96(%rsp),%xmm6,%xmm4
+	vpaddq %xmm4,%xmm3,%xmm3
+	vmovdqa 16(%rsp),%xmm4
+	vpmuludq 160(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm9,%xmm9
+	vpmuludq 16(%rsp),%xmm6,%xmm4
+	vpaddq %xmm4,%xmm8,%xmm8
+	vpmuludq 48(%rsp),%xmm6,%xmm4
+	vpaddq %xmm4,%xmm10,%xmm10
+	vpmuludq 80(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm4,%xmm4
+	vpaddq %xmm4,%xmm5,%xmm5
+	vpmuludq 112(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm0,%xmm0
+	vmovdqa 48(%rsp),%xmm4
+	vpaddq %xmm4,%xmm4,%xmm4
+	vpmuludq 160(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vmovdqa 80(%rsp),%xmm4
+	vpaddq %xmm4,%xmm4,%xmm4
+	vpmuludq 160(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm14,%xmm14
+	vpmuludq 64(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm3,%xmm3
+	vmovdqa 16(%rsp),%xmm4
+	vpmuludq 64(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm11,%xmm11
+	vmovdqa 16(%rsp),%xmm4
+	vpmuludq 96(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm12,%xmm12
+	vmovdqa 48(%rsp),%xmm4
+	vpmuludq 96(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm9,%xmm9
+	vpmuludq 0(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vmovdqa 32(%rsp),%xmm2
+	vpmuludq 0(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm3,%xmm3
+	vmovdqa 64(%rsp),%xmm2
+	vpmuludq 48(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vmovdqa 96(%rsp),%xmm2
+	vpmuludq 80(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm1,%xmm1
+	vmovdqa 160(%rsp),%xmm2
+	vpmuludq 112(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpsrlq $26,%xmm3,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3
+	vpsrlq $25,%xmm14,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
+	vpsrlq $25,%xmm11,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpand curve25519_sandy2x_m25(%rip),%xmm11,%xmm11
+	vpsrlq $26,%xmm5,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5
+	vpsrlq $26,%xmm12,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpand curve25519_sandy2x_m26(%rip),%xmm12,%xmm12
+	vpsrlq $25,%xmm0,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
+	vpsrlq $25,%xmm9,%xmm2
+	vpaddq %xmm2,%xmm1,%xmm1
+	vpand curve25519_sandy2x_m25(%rip),%xmm9,%xmm9
+	vpsrlq $26,%xmm8,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
+	vpsrlq $26,%xmm1,%xmm2
+	vpaddq %xmm2,%xmm14,%xmm14
+	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
+	vpsrlq $25,%xmm10,%xmm2
+	vpsllq $4,%xmm2,%xmm4
+	vpaddq %xmm2,%xmm3,%xmm3
+	vpsllq $1,%xmm2,%xmm2
+	vpaddq %xmm2,%xmm4,%xmm4
+	vpaddq %xmm4,%xmm3,%xmm3
+	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
+	vpsrlq $25,%xmm14,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
+	vpsrlq $26,%xmm3,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3
+	vpunpckhqdq %xmm11,%xmm3,%xmm2
+	vmovdqa %xmm2,0(%rsp)
+	vpunpcklqdq %xmm11,%xmm3,%xmm2
+	vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm2,%xmm2
+	vmovdqa %xmm2,80(%rsp)
+	vpunpckhqdq %xmm9,%xmm12,%xmm2
+	vmovdqa %xmm2,16(%rsp)
+	vpunpcklqdq %xmm9,%xmm12,%xmm2
+	vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm2,%xmm2
+	vmovdqa %xmm2,96(%rsp)
+	vpunpckhqdq %xmm14,%xmm1,%xmm2
+	vmovdqa %xmm2,32(%rsp)
+	vpunpcklqdq %xmm14,%xmm1,%xmm1
+	vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm1,%xmm1
+	vmovdqa %xmm1,112(%rsp)
+	vpunpckhqdq %xmm0,%xmm5,%xmm1
+	vmovdqa %xmm1,48(%rsp)
+	vpunpcklqdq %xmm0,%xmm5,%xmm0
+	vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm0,%xmm0
+	vmovdqa %xmm0,160(%rsp)
+	vpunpckhqdq %xmm10,%xmm8,%xmm0
+	vmovdqa %xmm0,64(%rsp)
+	vpunpcklqdq %xmm10,%xmm8,%xmm0
+	vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm0,%xmm0
+	vmovdqa %xmm0,208(%rsp)
+	vmovdqa 144(%rsp),%xmm0
+	vpmuludq %xmm0,%xmm0,%xmm1
+	vpaddq %xmm0,%xmm0,%xmm0
+	vmovdqa 128(%rsp),%xmm2
+	vpmuludq %xmm2,%xmm0,%xmm3
+	vmovdqa 192(%rsp),%xmm4
+	vpmuludq %xmm4,%xmm0,%xmm5
+	vmovdqa 176(%rsp),%xmm6
+	vpmuludq %xmm6,%xmm0,%xmm7
+	vmovdqa 240(%rsp),%xmm8
+	vpmuludq %xmm8,%xmm0,%xmm9
+	vpmuludq 224(%rsp),%xmm0,%xmm10
+	vpmuludq 304(%rsp),%xmm0,%xmm11
+	vpmuludq 288(%rsp),%xmm0,%xmm12
+	vpmuludq 336(%rsp),%xmm0,%xmm13
+	vmovdqa 384(%rsp),%xmm14
+	vpmuludq %xmm14,%xmm0,%xmm0
+	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm14,%xmm15
+	vpmuludq %xmm15,%xmm14,%xmm14
+	vpaddq %xmm14,%xmm13,%xmm13
+	vpaddq %xmm6,%xmm6,%xmm14
+	vpmuludq %xmm14,%xmm6,%xmm6
+	vpaddq %xmm6,%xmm11,%xmm11
+	vpaddq %xmm2,%xmm2,%xmm6
+	vpmuludq %xmm6,%xmm2,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpmuludq %xmm15,%xmm6,%xmm2
+	vpaddq %xmm2,%xmm1,%xmm1
+	vpmuludq %xmm15,%xmm4,%xmm2
+	vpaddq %xmm2,%xmm3,%xmm3
+	vpmuludq 256(%rsp),%xmm6,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpmuludq 304(%rsp),%xmm6,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 352(%rsp),%xmm6,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpmuludq 336(%rsp),%xmm6,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vpmuludq %xmm4,%xmm6,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpmuludq %xmm14,%xmm6,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpmuludq %xmm8,%xmm6,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq %xmm15,%xmm14,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpmuludq %xmm15,%xmm8,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpmuludq %xmm4,%xmm4,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpmuludq %xmm14,%xmm4,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpaddq %xmm4,%xmm4,%xmm2
+	vpmuludq %xmm8,%xmm2,%xmm4
+	vpaddq %xmm4,%xmm11,%xmm11
+	vpmuludq 400(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vpmuludq 400(%rsp),%xmm14,%xmm4
+	vpaddq %xmm4,%xmm3,%xmm3
+	vpmuludq 224(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm12,%xmm12
+	vpmuludq 304(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm13,%xmm13
+	vpmuludq 288(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vpmuludq 368(%rsp),%xmm8,%xmm2
+	vpaddq %xmm2,%xmm3,%xmm3
+	vpmuludq %xmm8,%xmm14,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq %xmm8,%xmm8,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpaddq %xmm8,%xmm8,%xmm2
+	vpmuludq 400(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm5,%xmm5
+	vpmuludq 256(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm9,%xmm9
+	vpmuludq 304(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm10,%xmm10
+	vpmuludq 368(%rsp),%xmm14,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vmovdqa 256(%rsp),%xmm4
+	vpmuludq 400(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm7,%xmm7
+	vpmuludq 256(%rsp),%xmm14,%xmm4
+	vpaddq %xmm4,%xmm13,%xmm13
+	vpmuludq 304(%rsp),%xmm14,%xmm4
+	vpaddq %xmm4,%xmm0,%xmm0
+	vpmuludq 352(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm11,%xmm11
+	vpmuludq 336(%rsp),%xmm15,%xmm4
+	vpaddq %xmm4,%xmm12,%xmm12
+	vmovdqa 304(%rsp),%xmm4
+	vpaddq %xmm4,%xmm4,%xmm4
+	vpmuludq 400(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm9,%xmm9
+	vpmuludq 320(%rsp),%xmm2,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vmovdqa 256(%rsp),%xmm4
+	vpmuludq 320(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm3,%xmm3
+	vmovdqa 256(%rsp),%xmm4
+	vpmuludq 368(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm5,%xmm5
+	vmovdqa 304(%rsp),%xmm4
+	vpmuludq 368(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm7,%xmm7
+	vmovdqa 352(%rsp),%xmm4
+	vpmuludq 400(%rsp),%xmm4,%xmm4
+	vpaddq %xmm4,%xmm10,%xmm10
+	vpmuludq 224(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vmovdqa 272(%rsp),%xmm2
+	vpmuludq 224(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm1,%xmm1
+	vmovdqa 320(%rsp),%xmm2
+	vpmuludq 304(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vmovdqa 368(%rsp),%xmm2
+	vpmuludq 288(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vmovdqa 400(%rsp),%xmm2
+	vpmuludq 336(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpsrlq $26,%xmm1,%xmm2
+	vpaddq %xmm2,%xmm3,%xmm3
+	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
+	vpsrlq $25,%xmm10,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
+	vpsrlq $25,%xmm3,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3
+	vpsrlq $26,%xmm11,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
+	vpsrlq $26,%xmm5,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5
+	vpsrlq $25,%xmm12,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12
+	vpsrlq $25,%xmm7,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
+	vpsrlq $26,%xmm13,%xmm2
+	vpaddq %xmm2,%xmm0,%xmm0
+	vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13
+	vpsrlq $26,%xmm9,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
+	vpsrlq $25,%xmm0,%xmm2
+	vpsllq $4,%xmm2,%xmm4
+	vpaddq %xmm2,%xmm1,%xmm1
+	vpsllq $1,%xmm2,%xmm2
+	vpaddq %xmm2,%xmm4,%xmm4
+	vpaddq %xmm4,%xmm1,%xmm1
+	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
+	vpsrlq $25,%xmm10,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
+	vpsrlq $26,%xmm1,%xmm2
+	vpaddq %xmm2,%xmm3,%xmm3
+	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
+	vpunpckhqdq %xmm3,%xmm1,%xmm2
+	vpunpcklqdq %xmm3,%xmm1,%xmm1
+	vmovdqa %xmm1,176(%rsp)
+	vpaddq curve25519_sandy2x_subc0(%rip),%xmm2,%xmm3
+	vpsubq %xmm1,%xmm3,%xmm3
+	vpunpckhqdq %xmm3,%xmm2,%xmm1
+	vpunpcklqdq %xmm3,%xmm2,%xmm2
+	vmovdqa %xmm2,192(%rsp)
+	vmovdqa %xmm1,224(%rsp)
+	vpsllq $1,%xmm1,%xmm1
+	vmovdqa %xmm1,240(%rsp)
+	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm3,%xmm3
+	vmovdqa 80(%rsp),%xmm1
+	vpunpcklqdq %xmm1,%xmm3,%xmm2
+	vpunpckhqdq %xmm1,%xmm3,%xmm1
+	vpunpckhqdq %xmm7,%xmm5,%xmm3
+	vpunpcklqdq %xmm7,%xmm5,%xmm4
+	vmovdqa %xmm4,256(%rsp)
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm5
+	vpsubq %xmm4,%xmm5,%xmm5
+	vpunpckhqdq %xmm5,%xmm3,%xmm4
+	vpunpcklqdq %xmm5,%xmm3,%xmm3
+	vmovdqa %xmm3,272(%rsp)
+	vmovdqa %xmm4,288(%rsp)
+	vpsllq $1,%xmm4,%xmm4
+	vmovdqa %xmm4,304(%rsp)
+	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm5,%xmm5
+	vmovdqa 96(%rsp),%xmm3
+	vpunpcklqdq %xmm3,%xmm5,%xmm4
+	vpunpckhqdq %xmm3,%xmm5,%xmm3
+	vpunpckhqdq %xmm10,%xmm9,%xmm5
+	vpunpcklqdq %xmm10,%xmm9,%xmm6
+	vmovdqa %xmm6,320(%rsp)
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm7
+	vpsubq %xmm6,%xmm7,%xmm7
+	vpunpckhqdq %xmm7,%xmm5,%xmm6
+	vpunpcklqdq %xmm7,%xmm5,%xmm5
+	vmovdqa %xmm5,336(%rsp)
+	vmovdqa %xmm6,352(%rsp)
+	vpsllq $1,%xmm6,%xmm6
+	vmovdqa %xmm6,368(%rsp)
+	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm7,%xmm7
+	vmovdqa 112(%rsp),%xmm5
+	vpunpcklqdq %xmm5,%xmm7,%xmm6
+	vpunpckhqdq %xmm5,%xmm7,%xmm5
+	vpunpckhqdq %xmm12,%xmm11,%xmm7
+	vpunpcklqdq %xmm12,%xmm11,%xmm8
+	vmovdqa %xmm8,384(%rsp)
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm9
+	vpsubq %xmm8,%xmm9,%xmm9
+	vpunpckhqdq %xmm9,%xmm7,%xmm8
+	vpunpcklqdq %xmm9,%xmm7,%xmm7
+	vmovdqa %xmm7,400(%rsp)
+	vmovdqa %xmm8,416(%rsp)
+	vpsllq $1,%xmm8,%xmm8
+	vmovdqa %xmm8,432(%rsp)
+	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm9,%xmm9
+	vmovdqa 160(%rsp),%xmm7
+	vpunpcklqdq %xmm7,%xmm9,%xmm8
+	vpunpckhqdq %xmm7,%xmm9,%xmm7
+	vpunpckhqdq %xmm0,%xmm13,%xmm9
+	vpunpcklqdq %xmm0,%xmm13,%xmm0
+	vmovdqa %xmm0,160(%rsp)
+	vpaddq curve25519_sandy2x_subc2(%rip),%xmm9,%xmm10
+	vpsubq %xmm0,%xmm10,%xmm10
+	vpunpckhqdq %xmm10,%xmm9,%xmm0
+	vpunpcklqdq %xmm10,%xmm9,%xmm9
+	vmovdqa %xmm9,448(%rsp)
+	vmovdqa %xmm0,464(%rsp)
+	vpsllq $1,%xmm0,%xmm0
+	vmovdqa %xmm0,480(%rsp)
+	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm10,%xmm10
+	vmovdqa 208(%rsp),%xmm0
+	vpunpcklqdq %xmm0,%xmm10,%xmm9
+	vpunpckhqdq %xmm0,%xmm10,%xmm0
+	vpsrlq $26,%xmm2,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2
+	vpsrlq $25,%xmm5,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
+	vpsrlq $25,%xmm1,%xmm10
+	vpaddq %xmm10,%xmm4,%xmm4
+	vpand curve25519_sandy2x_m25(%rip),%xmm1,%xmm1
+	vpsrlq $26,%xmm8,%xmm10
+	vpaddq %xmm10,%xmm7,%xmm7
+	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
+	vpsrlq $26,%xmm4,%xmm10
+	vpaddq %xmm10,%xmm3,%xmm3
+	vpand curve25519_sandy2x_m26(%rip),%xmm4,%xmm4
+	vpsrlq $25,%xmm7,%xmm10
+	vpaddq %xmm10,%xmm9,%xmm9
+	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
+	vpsrlq $25,%xmm3,%xmm10
+	vpaddq %xmm10,%xmm6,%xmm6
+	vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3
+	vpsrlq $26,%xmm9,%xmm10
+	vpaddq %xmm10,%xmm0,%xmm0
+	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
+	vpsrlq $26,%xmm6,%xmm10
+	vpaddq %xmm10,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
+	vpsrlq $25,%xmm0,%xmm10
+	vpsllq $4,%xmm10,%xmm11
+	vpaddq %xmm10,%xmm2,%xmm2
+	vpsllq $1,%xmm10,%xmm10
+	vpaddq %xmm10,%xmm11,%xmm11
+	vpaddq %xmm11,%xmm2,%xmm2
+	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
+	vpsrlq $25,%xmm5,%xmm10
+	vpaddq %xmm10,%xmm8,%xmm8
+	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
+	vpsrlq $26,%xmm2,%xmm10
+	vpaddq %xmm10,%xmm1,%xmm1
+	vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2
+	vpunpckhqdq %xmm1,%xmm2,%xmm10
+	vmovdqa %xmm10,80(%rsp)
+	vpunpcklqdq %xmm1,%xmm2,%xmm1
+	vpunpckhqdq %xmm3,%xmm4,%xmm2
+	vmovdqa %xmm2,96(%rsp)
+	vpunpcklqdq %xmm3,%xmm4,%xmm2
+	vpunpckhqdq %xmm5,%xmm6,%xmm3
+	vmovdqa %xmm3,112(%rsp)
+	vpunpcklqdq %xmm5,%xmm6,%xmm3
+	vpunpckhqdq %xmm7,%xmm8,%xmm4
+	vmovdqa %xmm4,128(%rsp)
+	vpunpcklqdq %xmm7,%xmm8,%xmm4
+	vpunpckhqdq %xmm0,%xmm9,%xmm5
+	vmovdqa %xmm5,144(%rsp)
+	vpunpcklqdq %xmm0,%xmm9,%xmm0
+	vmovdqa 176(%rsp),%xmm5
+	vpaddq %xmm5,%xmm1,%xmm1
+	vpunpcklqdq %xmm1,%xmm5,%xmm6
+	vpunpckhqdq %xmm1,%xmm5,%xmm1
+	vpmuludq 224(%rsp),%xmm6,%xmm5
+	vpmuludq 192(%rsp),%xmm1,%xmm7
+	vpaddq %xmm7,%xmm5,%xmm5
+	vpmuludq 272(%rsp),%xmm6,%xmm7
+	vpmuludq 240(%rsp),%xmm1,%xmm8
+	vpaddq %xmm8,%xmm7,%xmm7
+	vpmuludq 288(%rsp),%xmm6,%xmm8
+	vpmuludq 272(%rsp),%xmm1,%xmm9
+	vpaddq %xmm9,%xmm8,%xmm8
+	vpmuludq 336(%rsp),%xmm6,%xmm9
+	vpmuludq 304(%rsp),%xmm1,%xmm10
+	vpaddq %xmm10,%xmm9,%xmm9
+	vpmuludq 352(%rsp),%xmm6,%xmm10
+	vpmuludq 336(%rsp),%xmm1,%xmm11
+	vpaddq %xmm11,%xmm10,%xmm10
+	vpmuludq 400(%rsp),%xmm6,%xmm11
+	vpmuludq 368(%rsp),%xmm1,%xmm12
+	vpaddq %xmm12,%xmm11,%xmm11
+	vpmuludq 416(%rsp),%xmm6,%xmm12
+	vpmuludq 400(%rsp),%xmm1,%xmm13
+	vpaddq %xmm13,%xmm12,%xmm12
+	vpmuludq 448(%rsp),%xmm6,%xmm13
+	vpmuludq 432(%rsp),%xmm1,%xmm14
+	vpaddq %xmm14,%xmm13,%xmm13
+	vpmuludq 464(%rsp),%xmm6,%xmm14
+	vpmuludq 448(%rsp),%xmm1,%xmm15
+	vpaddq %xmm15,%xmm14,%xmm14
+	vpmuludq 192(%rsp),%xmm6,%xmm6
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
+	vpmuludq 480(%rsp),%xmm1,%xmm1
+	vpaddq %xmm1,%xmm6,%xmm6
+	vmovdqa 256(%rsp),%xmm1
+	vpaddq %xmm1,%xmm2,%xmm2
+	vpunpcklqdq %xmm2,%xmm1,%xmm15
+	vpunpckhqdq %xmm2,%xmm1,%xmm1
+	vpmuludq 192(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpmuludq 224(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpmuludq 272(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpmuludq 288(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 336(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpmuludq 352(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 400(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpmuludq 416(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm15,%xmm15
+	vpmuludq 448(%rsp),%xmm15,%xmm2
+	vpaddq %xmm2,%xmm6,%xmm6
+	vpmuludq 464(%rsp),%xmm15,%xmm15
+	vpaddq %xmm15,%xmm5,%xmm5
+	vpmuludq 192(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpmuludq 240(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpmuludq 272(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 304(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpmuludq 336(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 368(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpmuludq 400(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
+	vpmuludq 432(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm6,%xmm6
+	vpmuludq 448(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpmuludq 480(%rsp),%xmm1,%xmm1
+	vpaddq %xmm1,%xmm7,%xmm7
+	vmovdqa 320(%rsp),%xmm1
+	vpaddq %xmm1,%xmm3,%xmm3
+	vpunpcklqdq %xmm3,%xmm1,%xmm2
+	vpunpckhqdq %xmm3,%xmm1,%xmm1
+	vpmuludq 192(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm9,%xmm9
+	vpmuludq 224(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm10,%xmm10
+	vpmuludq 272(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 288(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm12,%xmm12
+	vpmuludq 336(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 352(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
+	vpmuludq 400(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 416(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 448(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq 464(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpmuludq 192(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 240(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm11,%xmm11
+	vpmuludq 272(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 304(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpmuludq 336(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
+	vpmuludq 368(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm6,%xmm6
+	vpmuludq 400(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpmuludq 432(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpmuludq 448(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpmuludq 480(%rsp),%xmm1,%xmm1
+	vpaddq %xmm1,%xmm9,%xmm9
+	vmovdqa 384(%rsp),%xmm1
+	vpaddq %xmm1,%xmm4,%xmm4
+	vpunpcklqdq %xmm4,%xmm1,%xmm2
+	vpunpckhqdq %xmm4,%xmm1,%xmm1
+	vpmuludq 192(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm11,%xmm11
+	vpmuludq 224(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm12,%xmm12
+	vpmuludq 272(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm13,%xmm13
+	vpmuludq 288(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
+	vpmuludq 336(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm6,%xmm6
+	vpmuludq 352(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm5,%xmm5
+	vpmuludq 400(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm7,%xmm7
+	vpmuludq 416(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm8,%xmm8
+	vpmuludq 448(%rsp),%xmm2,%xmm3
+	vpaddq %xmm3,%xmm9,%xmm9
+	vpmuludq 464(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 192(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 240(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm13,%xmm13
+	vpmuludq 272(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
+	vpmuludq 304(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm6,%xmm6
+	vpmuludq 336(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm5,%xmm5
+	vpmuludq 368(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm7,%xmm7
+	vpmuludq 400(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm8,%xmm8
+	vpmuludq 432(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm9,%xmm9
+	vpmuludq 448(%rsp),%xmm1,%xmm2
+	vpaddq %xmm2,%xmm10,%xmm10
+	vpmuludq 480(%rsp),%xmm1,%xmm1
+	vpaddq %xmm1,%xmm11,%xmm11
+	vmovdqa 160(%rsp),%xmm1
+	vpaddq %xmm1,%xmm0,%xmm0
+	vpunpcklqdq %xmm0,%xmm1,%xmm2
+	vpunpckhqdq %xmm0,%xmm1,%xmm0
+	vpmuludq 192(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm13,%xmm13
+	vpmuludq 224(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
+	vpmuludq 272(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm6,%xmm6
+	vpmuludq 288(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm5,%xmm5
+	vpmuludq 336(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm7,%xmm7
+	vpmuludq 352(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm8,%xmm8
+	vpmuludq 400(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm9,%xmm9
+	vpmuludq 416(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm10,%xmm10
+	vpmuludq 448(%rsp),%xmm2,%xmm1
+	vpaddq %xmm1,%xmm11,%xmm11
+	vpmuludq 464(%rsp),%xmm2,%xmm2
+	vpaddq %xmm2,%xmm12,%xmm12
+	vpmuludq 192(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm14,%xmm14
+	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm0,%xmm0
+	vpmuludq 240(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm6,%xmm6
+	vpmuludq 272(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm5,%xmm5
+	vpmuludq 304(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm7,%xmm7
+	vpmuludq 336(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm8,%xmm8
+	vpmuludq 368(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm9,%xmm9
+	vpmuludq 400(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm10,%xmm10
+	vpmuludq 432(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm11,%xmm11
+	vpmuludq 448(%rsp),%xmm0,%xmm1
+	vpaddq %xmm1,%xmm12,%xmm12
+	vpmuludq 480(%rsp),%xmm0,%xmm0
+	vpaddq %xmm0,%xmm13,%xmm13
+	vpsrlq $26,%xmm6,%xmm0
+	vpaddq %xmm0,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
+	vpsrlq $25,%xmm10,%xmm0
+	vpaddq %xmm0,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
+	vpsrlq $25,%xmm5,%xmm0
+	vpaddq %xmm0,%xmm7,%xmm7
+	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
+	vpsrlq $26,%xmm11,%xmm0
+	vpaddq %xmm0,%xmm12,%xmm12
+	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
+	vpsrlq $26,%xmm7,%xmm0
+	vpaddq %xmm0,%xmm8,%xmm8
+	vpand curve25519_sandy2x_m26(%rip),%xmm7,%xmm7
+	vpsrlq $25,%xmm12,%xmm0
+	vpaddq %xmm0,%xmm13,%xmm13
+	vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12
+	vpsrlq $25,%xmm8,%xmm0
+	vpaddq %xmm0,%xmm9,%xmm9
+	vpand curve25519_sandy2x_m25(%rip),%xmm8,%xmm8
+	vpsrlq $26,%xmm13,%xmm0
+	vpaddq %xmm0,%xmm14,%xmm14
+	vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13
+	vpsrlq $26,%xmm9,%xmm0
+	vpaddq %xmm0,%xmm10,%xmm10
+	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
+	vpsrlq $25,%xmm14,%xmm0
+	vpsllq $4,%xmm0,%xmm1
+	vpaddq %xmm0,%xmm6,%xmm6
+	vpsllq $1,%xmm0,%xmm0
+	vpaddq %xmm0,%xmm1,%xmm1
+	vpaddq %xmm1,%xmm6,%xmm6
+	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
+	vpsrlq $25,%xmm10,%xmm0
+	vpaddq %xmm0,%xmm11,%xmm11
+	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
+	vpsrlq $26,%xmm6,%xmm0
+	vpaddq %xmm0,%xmm5,%xmm5
+	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
+	vpunpckhqdq %xmm5,%xmm6,%xmm1
+	vpunpcklqdq %xmm5,%xmm6,%xmm0
+	vpunpckhqdq %xmm8,%xmm7,%xmm3
+	vpunpcklqdq %xmm8,%xmm7,%xmm2
+	vpunpckhqdq %xmm10,%xmm9,%xmm5
+	vpunpcklqdq %xmm10,%xmm9,%xmm4
+	vpunpckhqdq %xmm12,%xmm11,%xmm7
+	vpunpcklqdq %xmm12,%xmm11,%xmm6
+	vpunpckhqdq %xmm14,%xmm13,%xmm9
+	vpunpcklqdq %xmm14,%xmm13,%xmm8
+	cmp $0,%rdx
+	jne .Lladder_base_loop
+	vmovdqu %xmm1,80(%rdi)
+	vmovdqu %xmm0,0(%rdi)
+	vmovdqu %xmm3,96(%rdi)
+	vmovdqu %xmm2,16(%rdi)
+	vmovdqu %xmm5,112(%rdi)
+	vmovdqu %xmm4,32(%rdi)
+	vmovdqu %xmm7,128(%rdi)
+	vmovdqu %xmm6,48(%rdi)
+	vmovdqu %xmm9,144(%rdi)
+	vmovdqu %xmm8,64(%rdi)
+	movq 1536(%rsp),%r11
+	movq 1544(%rsp),%r12
+	movq 1552(%rsp),%r13
+	leave
+	ret
+ENDPROC(curve25519_sandy2x_ladder_base)
+#endif /* CONFIG_AS_AVX */
diff --git b/net/wireguard/crypto/curve25519.c b/net/wireguard/crypto/curve25519.c
new file mode 100644
index 0000000..adf6a0b
--- /dev/null
+++ b/net/wireguard/crypto/curve25519.c
@@ -0,0 +1,1689 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2008 Google Inc. All Rights Reserved.
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Original author: Adam Langley <agl@imperialviolet.org>
+ */
+
+#include "curve25519.h"
+
+#include <linux/version.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <crypto/algapi.h>
+
+#define ARCH_HAS_SEPARATE_IRQ_STACK
+#if (defined(CONFIG_MIPS) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) || defined(CONFIG_ARM)
+#undef ARCH_HAS_SEPARATE_IRQ_STACK
+#endif
+
+static __always_inline void normalize_secret(u8 secret[CURVE25519_POINT_SIZE])
+{
+	secret[0] &= 248;
+	secret[31] &= 127;
+	secret[31] |= 64;
+}
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX)
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/fpu/api.h>
+#include <asm/simd.h>
+static bool curve25519_use_avx __read_mostly;
+void __init curve25519_fpu_init(void)
+{
+	curve25519_use_avx = boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+}
+
+typedef u64 fe[10];
+typedef u64 fe51[5];
+asmlinkage void curve25519_sandy2x_ladder(fe *, const u8 *);
+asmlinkage void curve25519_sandy2x_ladder_base(fe *, const u8 *);
+asmlinkage void curve25519_sandy2x_fe51_pack(u8 *, const fe51 *);
+asmlinkage void curve25519_sandy2x_fe51_mul(fe51 *, const fe51 *, const fe51 *);
+asmlinkage void curve25519_sandy2x_fe51_nsquare(fe51 *, const fe51 *, int);
+
+static inline u32 le24_to_cpupv(const u8 *in)
+{
+	return le16_to_cpup((__le16 *)in) | ((u32)in[2]) << 16;
+}
+
+static inline void fe_frombytes(fe h, const u8 *s)
+{
+	u64 h0 = le32_to_cpup((__le32 *)s);
+	u64 h1 = le24_to_cpupv(s + 4) << 6;
+	u64 h2 = le24_to_cpupv(s + 7) << 5;
+	u64 h3 = le24_to_cpupv(s + 10) << 3;
+	u64 h4 = le24_to_cpupv(s + 13) << 2;
+	u64 h5 = le32_to_cpup((__le32 *)(s + 16));
+	u64 h6 = le24_to_cpupv(s + 20) << 7;
+	u64 h7 = le24_to_cpupv(s + 23) << 5;
+	u64 h8 = le24_to_cpupv(s + 26) << 4;
+	u64 h9 = (le24_to_cpupv(s + 29) & 8388607) << 2;
+	u64 carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9;
+
+	carry9 = h9 >> 25; h0 += carry9 * 19; h9 &= 0x1FFFFFF;
+	carry1 = h1 >> 25; h2 += carry1; h1 &= 0x1FFFFFF;
+	carry3 = h3 >> 25; h4 += carry3; h3 &= 0x1FFFFFF;
+	carry5 = h5 >> 25; h6 += carry5; h5 &= 0x1FFFFFF;
+	carry7 = h7 >> 25; h8 += carry7; h7 &= 0x1FFFFFF;
+
+	carry0 = h0 >> 26; h1 += carry0; h0 &= 0x3FFFFFF;
+	carry2 = h2 >> 26; h3 += carry2; h2 &= 0x3FFFFFF;
+	carry4 = h4 >> 26; h5 += carry4; h4 &= 0x3FFFFFF;
+	carry6 = h6 >> 26; h7 += carry6; h6 &= 0x3FFFFFF;
+	carry8 = h8 >> 26; h9 += carry8; h8 &= 0x3FFFFFF;
+
+	h[0] = h0;
+	h[1] = h1;
+	h[2] = h2;
+	h[3] = h3;
+	h[4] = h4;
+	h[5] = h5;
+	h[6] = h6;
+	h[7] = h7;
+	h[8] = h8;
+	h[9] = h9;
+}
+
+static inline void fe51_invert(fe51 *r, const fe51 *x)
+{
+	fe51 z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t;
+
+	/* 2 */ curve25519_sandy2x_fe51_nsquare(&z2, x, 1);
+	/* 4 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2, 1);
+	/* 8 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 1);
+	/* 9 */ curve25519_sandy2x_fe51_mul(&z9, (const fe51 *)&t, x);
+	/* 11 */ curve25519_sandy2x_fe51_mul(&z11, (const fe51 *)&z9, (const fe51 *)&z2);
+	/* 22 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z11, 1);
+	/* 2^5 - 2^0 = 31 */ curve25519_sandy2x_fe51_mul(&z2_5_0, (const fe51 *)&t, (const fe51 *)&z9);
+
+	/* 2^10 - 2^5 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_5_0, 5);
+	/* 2^10 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_10_0, (const fe51 *)&t, (const fe51 *)&z2_5_0);
+
+	/* 2^20 - 2^10 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_10_0, 10);
+	/* 2^20 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_20_0, (const fe51 *)&t, (const fe51 *)&z2_10_0);
+
+	/* 2^40 - 2^20 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_20_0, 20);
+	/* 2^40 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_20_0);
+
+	/* 2^50 - 2^10 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 10);
+	/* 2^50 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_50_0, (const fe51 *)&t, (const fe51 *)&z2_10_0);
+
+	/* 2^100 - 2^50 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_50_0, 50);
+	/* 2^100 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_100_0, (const fe51 *)&t, (const fe51 *)&z2_50_0);
+
+	/* 2^200 - 2^100 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_100_0, 100);
+	/* 2^200 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_100_0);
+
+	/* 2^250 - 2^50 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 50);
+	/* 2^250 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_50_0);
+
+	/* 2^255 - 2^5 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 5);
+	/* 2^255 - 21 */ curve25519_sandy2x_fe51_mul(r, (const fe51 *)t, (const fe51 *)&z11);
+}
+
+static void curve25519_sandy2x(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE])
+{
+	u8 e[32];
+	fe var[3];
+	fe51 x_51, z_51;
+
+	memcpy(e, secret, 32);
+	normalize_secret(e);
+#define x1 var[0]
+#define x2 var[1]
+#define z2 var[2]
+	fe_frombytes(x1, basepoint);
+	curve25519_sandy2x_ladder(var, e);
+	z_51[0] = (z2[1] << 26) + z2[0];
+	z_51[1] = (z2[3] << 26) + z2[2];
+	z_51[2] = (z2[5] << 26) + z2[4];
+	z_51[3] = (z2[7] << 26) + z2[6];
+	z_51[4] = (z2[9] << 26) + z2[8];
+	x_51[0] = (x2[1] << 26) + x2[0];
+	x_51[1] = (x2[3] << 26) + x2[2];
+	x_51[2] = (x2[5] << 26) + x2[4];
+	x_51[3] = (x2[7] << 26) + x2[6];
+	x_51[4] = (x2[9] << 26) + x2[8];
+#undef x1
+#undef x2
+#undef z2
+	fe51_invert(&z_51, (const fe51 *)&z_51);
+	curve25519_sandy2x_fe51_mul(&x_51, (const fe51 *)&x_51, (const fe51 *)&z_51);
+	curve25519_sandy2x_fe51_pack(mypublic, (const fe51 *)&x_51);
+
+	memzero_explicit(e, sizeof(e));
+	memzero_explicit(var, sizeof(var));
+	memzero_explicit(x_51, sizeof(x_51));
+	memzero_explicit(z_51, sizeof(z_51));
+}
+
+static void curve25519_sandy2x_base(u8 pub[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE])
+{
+	u8 e[32];
+	fe var[3];
+	fe51 x_51, z_51;
+
+	memcpy(e, secret, 32);
+	normalize_secret(e);
+	curve25519_sandy2x_ladder_base(var, e);
+#define x2 var[0]
+#define z2 var[1]
+	z_51[0] = (z2[1] << 26) + z2[0];
+	z_51[1] = (z2[3] << 26) + z2[2];
+	z_51[2] = (z2[5] << 26) + z2[4];
+	z_51[3] = (z2[7] << 26) + z2[6];
+	z_51[4] = (z2[9] << 26) + z2[8];
+	x_51[0] = (x2[1] << 26) + x2[0];
+	x_51[1] = (x2[3] << 26) + x2[2];
+	x_51[2] = (x2[5] << 26) + x2[4];
+	x_51[3] = (x2[7] << 26) + x2[6];
+	x_51[4] = (x2[9] << 26) + x2[8];
+#undef x2
+#undef z2
+	fe51_invert(&z_51, (const fe51 *)&z_51);
+	curve25519_sandy2x_fe51_mul(&x_51, (const fe51 *)&x_51, (const fe51 *)&z_51);
+	curve25519_sandy2x_fe51_pack(pub, (const fe51 *)&x_51);
+
+	memzero_explicit(e, sizeof(e));
+	memzero_explicit(var, sizeof(var));
+	memzero_explicit(x_51, sizeof(x_51));
+	memzero_explicit(z_51, sizeof(z_51));
+}
+#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM)
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE]);
+static bool curve25519_use_neon __read_mostly;
+void __init curve25519_fpu_init(void)
+{
+	curve25519_use_neon = elf_hwcap & HWCAP_NEON;
+}
+#else
+void __init curve25519_fpu_init(void) { }
+#endif
+
+#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+typedef u64 limb;
+typedef limb felem[5];
+typedef __uint128_t u128;
+
+/* Sum two numbers: output += in */
+static __always_inline void fsum(limb *output, const limb *in)
+{
+	output[0] += in[0];
+	output[1] += in[1];
+	output[2] += in[2];
+	output[3] += in[3];
+	output[4] += in[4];
+}
+
+/* Find the difference of two numbers: output = in - output
+ * (note the order of the arguments!)
+ *
+ * Assumes that out[i] < 2**52
+ * On return, out[i] < 2**55
+ */
+static __always_inline void fdifference_backwards(felem out, const felem in)
+{
+	/* 152 is 19 << 3 */
+	static const limb two54m152 = (((limb)1) << 54) - 152;
+	static const limb two54m8 = (((limb)1) << 54) - 8;
+
+	out[0] = in[0] + two54m152 - out[0];
+	out[1] = in[1] + two54m8 - out[1];
+	out[2] = in[2] + two54m8 - out[2];
+	out[3] = in[3] + two54m8 - out[3];
+	out[4] = in[4] + two54m8 - out[4];
+}
+
+/* Multiply a number by a scalar: output = in * scalar */
+static __always_inline void fscalar_product(felem output, const felem in, const limb scalar)
+{
+	u128 a;
+
+	a = ((u128) in[0]) * scalar;
+	output[0] = ((limb)a) & 0x7ffffffffffffUL;
+
+	a = ((u128) in[1]) * scalar + ((limb) (a >> 51));
+	output[1] = ((limb)a) & 0x7ffffffffffffUL;
+
+	a = ((u128) in[2]) * scalar + ((limb) (a >> 51));
+	output[2] = ((limb)a) & 0x7ffffffffffffUL;
+
+	a = ((u128) in[3]) * scalar + ((limb) (a >> 51));
+	output[3] = ((limb)a) & 0x7ffffffffffffUL;
+
+	a = ((u128) in[4]) * scalar + ((limb) (a >> 51));
+	output[4] = ((limb)a) & 0x7ffffffffffffUL;
+
+	output[0] += (a >> 51) * 19;
+}
+
+/* Multiply two numbers: output = in2 * in
+ *
+ * output must be distinct to both inputs. The inputs are reduced coefficient
+ * form, the output is not.
+ *
+ * Assumes that in[i] < 2**55 and likewise for in2.
+ * On return, output[i] < 2**52
+ */
+static __always_inline void fmul(felem output, const felem in2, const felem in)
+{
+	u128 t[5];
+	limb r0, r1, r2, r3, r4, s0, s1, s2, s3, s4, c;
+
+	r0 = in[0];
+	r1 = in[1];
+	r2 = in[2];
+	r3 = in[3];
+	r4 = in[4];
+
+	s0 = in2[0];
+	s1 = in2[1];
+	s2 = in2[2];
+	s3 = in2[3];
+	s4 = in2[4];
+
+	t[0]  =  ((u128) r0) * s0;
+	t[1]  =  ((u128) r0) * s1 + ((u128) r1) * s0;
+	t[2]  =  ((u128) r0) * s2 + ((u128) r2) * s0 + ((u128) r1) * s1;
+	t[3]  =  ((u128) r0) * s3 + ((u128) r3) * s0 + ((u128) r1) * s2 + ((u128) r2) * s1;
+	t[4]  =  ((u128) r0) * s4 + ((u128) r4) * s0 + ((u128) r3) * s1 + ((u128) r1) * s3 + ((u128) r2) * s2;
+
+	r4 *= 19;
+	r1 *= 19;
+	r2 *= 19;
+	r3 *= 19;
+
+	t[0] += ((u128) r4) * s1 + ((u128) r1) * s4 + ((u128) r2) * s3 + ((u128) r3) * s2;
+	t[1] += ((u128) r4) * s2 + ((u128) r2) * s4 + ((u128) r3) * s3;
+	t[2] += ((u128) r4) * s3 + ((u128) r3) * s4;
+	t[3] += ((u128) r4) * s4;
+
+			r0 = (limb)t[0] & 0x7ffffffffffffUL; c = (limb)(t[0] >> 51);
+	t[1] += c;      r1 = (limb)t[1] & 0x7ffffffffffffUL; c = (limb)(t[1] >> 51);
+	t[2] += c;      r2 = (limb)t[2] & 0x7ffffffffffffUL; c = (limb)(t[2] >> 51);
+	t[3] += c;      r3 = (limb)t[3] & 0x7ffffffffffffUL; c = (limb)(t[3] >> 51);
+	t[4] += c;      r4 = (limb)t[4] & 0x7ffffffffffffUL; c = (limb)(t[4] >> 51);
+	r0 +=   c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffffUL;
+	r1 +=   c;      c = r1 >> 51; r1 = r1 & 0x7ffffffffffffUL;
+	r2 +=   c;
+
+	output[0] = r0;
+	output[1] = r1;
+	output[2] = r2;
+	output[3] = r3;
+	output[4] = r4;
+}
+
+static __always_inline void fsquare_times(felem output, const felem in, limb count)
+{
+	u128 t[5];
+	limb r0, r1, r2, r3, r4, c;
+	limb d0, d1, d2, d4, d419;
+
+	r0 = in[0];
+	r1 = in[1];
+	r2 = in[2];
+	r3 = in[3];
+	r4 = in[4];
+
+	do {
+		d0 = r0 * 2;
+		d1 = r1 * 2;
+		d2 = r2 * 2 * 19;
+		d419 = r4 * 19;
+		d4 = d419 * 2;
+
+		t[0] = ((u128) r0) * r0 + ((u128) d4) * r1 + (((u128) d2) * (r3     ));
+		t[1] = ((u128) d0) * r1 + ((u128) d4) * r2 + (((u128) r3) * (r3 * 19));
+		t[2] = ((u128) d0) * r2 + ((u128) r1) * r1 + (((u128) d4) * (r3     ));
+		t[3] = ((u128) d0) * r3 + ((u128) d1) * r2 + (((u128) r4) * (d419   ));
+		t[4] = ((u128) d0) * r4 + ((u128) d1) * r3 + (((u128) r2) * (r2     ));
+
+				r0 = (limb)t[0] & 0x7ffffffffffffUL; c = (limb)(t[0] >> 51);
+		t[1] += c;      r1 = (limb)t[1] & 0x7ffffffffffffUL; c = (limb)(t[1] >> 51);
+		t[2] += c;      r2 = (limb)t[2] & 0x7ffffffffffffUL; c = (limb)(t[2] >> 51);
+		t[3] += c;      r3 = (limb)t[3] & 0x7ffffffffffffUL; c = (limb)(t[3] >> 51);
+		t[4] += c;      r4 = (limb)t[4] & 0x7ffffffffffffUL; c = (limb)(t[4] >> 51);
+		r0 +=   c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffffUL;
+		r1 +=   c;      c = r1 >> 51; r1 = r1 & 0x7ffffffffffffUL;
+		r2 +=   c;
+	} while (--count);
+
+	output[0] = r0;
+	output[1] = r1;
+	output[2] = r2;
+	output[3] = r3;
+	output[4] = r4;
+}
+
+/* Load a little-endian 64-bit number  */
+static inline limb load_limb(const u8 *in)
+{
+	return le64_to_cpu(*(__le64 *)in);
+}
+
+static inline void store_limb(u8 *out, limb in)
+{
+	*(__le64 *)out = cpu_to_le64(in);
+}
+
+/* Take a little-endian, 32-byte number and expand it into polynomial form */
+static inline void fexpand(limb *output, const u8 *in)
+{
+	output[0] = load_limb(in) & 0x7ffffffffffffUL;
+	output[1] = (load_limb(in + 6) >> 3) & 0x7ffffffffffffUL;
+	output[2] = (load_limb(in + 12) >> 6) & 0x7ffffffffffffUL;
+	output[3] = (load_limb(in + 19) >> 1) & 0x7ffffffffffffUL;
+	output[4] = (load_limb(in + 24) >> 12) & 0x7ffffffffffffUL;
+}
+
+/* Take a fully reduced polynomial form number and contract it into a
+ * little-endian, 32-byte array
+ */
+static void fcontract(u8 *output, const felem input)
+{
+	u128 t[5];
+
+	t[0] = input[0];
+	t[1] = input[1];
+	t[2] = input[2];
+	t[3] = input[3];
+	t[4] = input[4];
+
+	t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL;
+	t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL;
+	t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL;
+	t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL;
+	t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL;
+
+	t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL;
+	t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL;
+	t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL;
+	t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL;
+	t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL;
+
+	/* now t is between 0 and 2^255-1, properly carried. */
+	/* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
+
+	t[0] += 19;
+
+	t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL;
+	t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL;
+	t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL;
+	t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL;
+	t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL;
+
+	/* now between 19 and 2^255-1 in both cases, and offset by 19. */
+
+	t[0] += 0x8000000000000UL - 19;
+	t[1] += 0x8000000000000UL - 1;
+	t[2] += 0x8000000000000UL - 1;
+	t[3] += 0x8000000000000UL - 1;
+	t[4] += 0x8000000000000UL - 1;
+
+	/* now between 2^255 and 2^256-20, and offset by 2^255. */
+
+	t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL;
+	t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL;
+	t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL;
+	t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL;
+	t[4] &= 0x7ffffffffffffUL;
+
+	store_limb(output,    t[0] | (t[1] << 51));
+	store_limb(output+8,  (t[1] >> 13) | (t[2] << 38));
+	store_limb(output+16, (t[2] >> 26) | (t[3] << 25));
+	store_limb(output+24, (t[3] >> 39) | (t[4] << 12));
+}
+
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ *   x2 z3: long form
+ *   x3 z3: long form
+ *   x z: short form, destroyed
+ *   xprime zprime: short form, destroyed
+ *   qmqp: short form, preserved
+ */
+static void fmonty(limb *x2, limb *z2, /* output 2Q */
+			 limb *x3, limb *z3, /* output Q + Q' */
+			 limb *x, limb *z,   /* input Q */
+			 limb *xprime, limb *zprime, /* input Q' */
+
+			 const limb *qmqp /* input Q - Q' */)
+{
+	limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5], zzzprime[5];
+
+	memcpy(origx, x, 5 * sizeof(limb));
+	fsum(x, z);
+	fdifference_backwards(z, origx);  // does x - z
+
+	memcpy(origxprime, xprime, sizeof(limb) * 5);
+	fsum(xprime, zprime);
+	fdifference_backwards(zprime, origxprime);
+	fmul(xxprime, xprime, z);
+	fmul(zzprime, x, zprime);
+	memcpy(origxprime, xxprime, sizeof(limb) * 5);
+	fsum(xxprime, zzprime);
+	fdifference_backwards(zzprime, origxprime);
+	fsquare_times(x3, xxprime, 1);
+	fsquare_times(zzzprime, zzprime, 1);
+	fmul(z3, zzzprime, qmqp);
+
+	fsquare_times(xx, x, 1);
+	fsquare_times(zz, z, 1);
+	fmul(x2, xx, zz);
+	fdifference_backwards(zz, xx);  // does zz = xx - zz
+	fscalar_product(zzz, zz, 121665);
+	fsum(zzz, xx);
+	fmul(z2, zz, zzz);
+}
+
+/* Maybe swap the contents of two limb arrays (@a and @b), each @len elements
+ * long. Perform the swap iff @swap is non-zero.
+ *
+ * This function performs the swap without leaking any side-channel
+ * information.
+ */
+static void swap_conditional(limb a[5], limb b[5], limb iswap)
+{
+	unsigned int i;
+	const limb swap = -iswap;
+
+	for (i = 0; i < 5; ++i) {
+		const limb x = swap & (a[i] ^ b[i]);
+
+		a[i] ^= x;
+		b[i] ^= x;
+	}
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ *   resultx/resultz: the x coordinate of the resulting curve point (short form)
+ *   n: a little endian, 32-byte number
+ *   q: a point of the curve (short form)
+ */
+static void cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q)
+{
+	limb a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0};
+	limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
+	limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
+	limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
+
+	unsigned int i, j;
+
+	memcpy(nqpqx, q, sizeof(limb) * 5);
+
+	for (i = 0; i < 32; ++i) {
+		u8 byte = n[31 - i];
+
+		for (j = 0; j < 8; ++j) {
+			const limb bit = byte >> 7;
+
+			swap_conditional(nqx, nqpqx, bit);
+			swap_conditional(nqz, nqpqz, bit);
+			fmonty(nqx2, nqz2,
+						 nqpqx2, nqpqz2,
+						 nqx, nqz,
+						 nqpqx, nqpqz,
+						 q);
+			swap_conditional(nqx2, nqpqx2, bit);
+			swap_conditional(nqz2, nqpqz2, bit);
+
+			t = nqx;
+			nqx = nqx2;
+			nqx2 = t;
+			t = nqz;
+			nqz = nqz2;
+			nqz2 = t;
+			t = nqpqx;
+			nqpqx = nqpqx2;
+			nqpqx2 = t;
+			t = nqpqz;
+			nqpqz = nqpqz2;
+			nqpqz2 = t;
+
+			byte <<= 1;
+		}
+	}
+
+	memcpy(resultx, nqx, sizeof(limb) * 5);
+	memcpy(resultz, nqz, sizeof(limb) * 5);
+}
+
+static void crecip(felem out, const felem z)
+{
+	felem a, t0, b, c;
+
+	/* 2 */ fsquare_times(a, z, 1); // a = 2
+	/* 8 */ fsquare_times(t0, a, 2);
+	/* 9 */ fmul(b, t0, z); // b = 9
+	/* 11 */ fmul(a, b, a); // a = 11
+	/* 22 */ fsquare_times(t0, a, 1);
+	/* 2^5 - 2^0 = 31 */ fmul(b, t0, b);
+	/* 2^10 - 2^5 */ fsquare_times(t0, b, 5);
+	/* 2^10 - 2^0 */ fmul(b, t0, b);
+	/* 2^20 - 2^10 */ fsquare_times(t0, b, 10);
+	/* 2^20 - 2^0 */ fmul(c, t0, b);
+	/* 2^40 - 2^20 */ fsquare_times(t0, c, 20);
+	/* 2^40 - 2^0 */ fmul(t0, t0, c);
+	/* 2^50 - 2^10 */ fsquare_times(t0, t0, 10);
+	/* 2^50 - 2^0 */ fmul(b, t0, b);
+	/* 2^100 - 2^50 */ fsquare_times(t0, b, 50);
+	/* 2^100 - 2^0 */ fmul(c, t0, b);
+	/* 2^200 - 2^100 */ fsquare_times(t0, c, 100);
+	/* 2^200 - 2^0 */ fmul(t0, t0, c);
+	/* 2^250 - 2^50 */ fsquare_times(t0, t0, 50);
+	/* 2^250 - 2^0 */ fmul(t0, t0, b);
+	/* 2^255 - 2^5 */ fsquare_times(t0, t0, 5);
+	/* 2^255 - 21 */ fmul(out, t0, a);
+}
+
+static bool curve25519_donna(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE])
+{
+	limb bp[5], x[5], z[5], zmone[5];
+	u8 e[32];
+
+	memcpy(e, secret, 32);
+	normalize_secret(e);
+
+	fexpand(bp, basepoint);
+	cmult(x, z, e, bp);
+	crecip(zmone, z);
+	fmul(z, x, zmone);
+	fcontract(mypublic, z);
+
+	memzero_explicit(e, sizeof(e));
+	memzero_explicit(bp, sizeof(bp));
+	memzero_explicit(x, sizeof(x));
+	memzero_explicit(z, sizeof(z));
+	memzero_explicit(zmone, sizeof(zmone));
+
+	return true;
+}
+#else
+typedef s64 limb;
+
+/* Field element representation:
+ *
+ * Field elements are written as an array of signed, 64-bit limbs, least
+ * significant first. The value of the field element is:
+ *   x[0] + 2^26·x[1] + x^51·x[2] + 2^102·x[3] + ...
+ *
+ * i.e. the limbs are 26, 25, 26, 25, ... bits wide.
+ */
+
+/* Sum two numbers: output += in */
+static void fsum(limb *output, const limb *in)
+{
+	unsigned int i;
+
+	for (i = 0; i < 10; i += 2) {
+		output[0 + i] = output[0 + i] + in[0 + i];
+		output[1 + i] = output[1 + i] + in[1 + i];
+	}
+}
+
+/* Find the difference of two numbers: output = in - output
+ * (note the order of the arguments!).
+ */
+static void fdifference(limb *output, const limb *in)
+{
+	unsigned int i;
+
+	for (i = 0; i < 10; ++i)
+		output[i] = in[i] - output[i];
+}
+
+/* Multiply a number by a scalar: output = in * scalar */
+static void fscalar_product(limb *output, const limb *in, const limb scalar)
+{
+	unsigned int i;
+
+	for (i = 0; i < 10; ++i)
+		output[i] = in[i] * scalar;
+}
+
+/* Multiply two numbers: output = in2 * in
+ *
+ * output must be distinct to both inputs. The inputs are reduced coefficient
+ * form, the output is not.
+ *
+ * output[x] <= 14 * the largest product of the input limbs.
+ */
+static void fproduct(limb *output, const limb *in2, const limb *in)
+{
+	output[0] =       ((limb) ((s32) in2[0])) * ((s32) in[0]);
+	output[1] =       ((limb) ((s32) in2[0])) * ((s32) in[1]) +
+					    ((limb) ((s32) in2[1])) * ((s32) in[0]);
+	output[2] =  2 *  ((limb) ((s32) in2[1])) * ((s32) in[1]) +
+					    ((limb) ((s32) in2[0])) * ((s32) in[2]) +
+					    ((limb) ((s32) in2[2])) * ((s32) in[0]);
+	output[3] =       ((limb) ((s32) in2[1])) * ((s32) in[2]) +
+					    ((limb) ((s32) in2[2])) * ((s32) in[1]) +
+					    ((limb) ((s32) in2[0])) * ((s32) in[3]) +
+					    ((limb) ((s32) in2[3])) * ((s32) in[0]);
+	output[4] =       ((limb) ((s32) in2[2])) * ((s32) in[2]) +
+				       2 * (((limb) ((s32) in2[1])) * ((s32) in[3]) +
+					    ((limb) ((s32) in2[3])) * ((s32) in[1])) +
+					    ((limb) ((s32) in2[0])) * ((s32) in[4]) +
+					    ((limb) ((s32) in2[4])) * ((s32) in[0]);
+	output[5] =       ((limb) ((s32) in2[2])) * ((s32) in[3]) +
+					    ((limb) ((s32) in2[3])) * ((s32) in[2]) +
+					    ((limb) ((s32) in2[1])) * ((s32) in[4]) +
+					    ((limb) ((s32) in2[4])) * ((s32) in[1]) +
+					    ((limb) ((s32) in2[0])) * ((s32) in[5]) +
+					    ((limb) ((s32) in2[5])) * ((s32) in[0]);
+	output[6] =  2 * (((limb) ((s32) in2[3])) * ((s32) in[3]) +
+					    ((limb) ((s32) in2[1])) * ((s32) in[5]) +
+					    ((limb) ((s32) in2[5])) * ((s32) in[1])) +
+					    ((limb) ((s32) in2[2])) * ((s32) in[4]) +
+					    ((limb) ((s32) in2[4])) * ((s32) in[2]) +
+					    ((limb) ((s32) in2[0])) * ((s32) in[6]) +
+					    ((limb) ((s32) in2[6])) * ((s32) in[0]);
+	output[7] =       ((limb) ((s32) in2[3])) * ((s32) in[4]) +
+					    ((limb) ((s32) in2[4])) * ((s32) in[3]) +
+					    ((limb) ((s32) in2[2])) * ((s32) in[5]) +
+					    ((limb) ((s32) in2[5])) * ((s32) in[2]) +
+					    ((limb) ((s32) in2[1])) * ((s32) in[6]) +
+					    ((limb) ((s32) in2[6])) * ((s32) in[1]) +
+					    ((limb) ((s32) in2[0])) * ((s32) in[7]) +
+					    ((limb) ((s32) in2[7])) * ((s32) in[0]);
+	output[8] =       ((limb) ((s32) in2[4])) * ((s32) in[4]) +
+				       2 * (((limb) ((s32) in2[3])) * ((s32) in[5]) +
+					    ((limb) ((s32) in2[5])) * ((s32) in[3]) +
+					    ((limb) ((s32) in2[1])) * ((s32) in[7]) +
+					    ((limb) ((s32) in2[7])) * ((s32) in[1])) +
+					    ((limb) ((s32) in2[2])) * ((s32) in[6]) +
+					    ((limb) ((s32) in2[6])) * ((s32) in[2]) +
+					    ((limb) ((s32) in2[0])) * ((s32) in[8]) +
+					    ((limb) ((s32) in2[8])) * ((s32) in[0]);
+	output[9] =       ((limb) ((s32) in2[4])) * ((s32) in[5]) +
+					    ((limb) ((s32) in2[5])) * ((s32) in[4]) +
+					    ((limb) ((s32) in2[3])) * ((s32) in[6]) +
+					    ((limb) ((s32) in2[6])) * ((s32) in[3]) +
+					    ((limb) ((s32) in2[2])) * ((s32) in[7]) +
+					    ((limb) ((s32) in2[7])) * ((s32) in[2]) +
+					    ((limb) ((s32) in2[1])) * ((s32) in[8]) +
+					    ((limb) ((s32) in2[8])) * ((s32) in[1]) +
+					    ((limb) ((s32) in2[0])) * ((s32) in[9]) +
+					    ((limb) ((s32) in2[9])) * ((s32) in[0]);
+	output[10] = 2 * (((limb) ((s32) in2[5])) * ((s32) in[5]) +
+					    ((limb) ((s32) in2[3])) * ((s32) in[7]) +
+					    ((limb) ((s32) in2[7])) * ((s32) in[3]) +
+					    ((limb) ((s32) in2[1])) * ((s32) in[9]) +
+					    ((limb) ((s32) in2[9])) * ((s32) in[1])) +
+					    ((limb) ((s32) in2[4])) * ((s32) in[6]) +
+					    ((limb) ((s32) in2[6])) * ((s32) in[4]) +
+					    ((limb) ((s32) in2[2])) * ((s32) in[8]) +
+					    ((limb) ((s32) in2[8])) * ((s32) in[2]);
+	output[11] =      ((limb) ((s32) in2[5])) * ((s32) in[6]) +
+					    ((limb) ((s32) in2[6])) * ((s32) in[5]) +
+					    ((limb) ((s32) in2[4])) * ((s32) in[7]) +
+					    ((limb) ((s32) in2[7])) * ((s32) in[4]) +
+					    ((limb) ((s32) in2[3])) * ((s32) in[8]) +
+					    ((limb) ((s32) in2[8])) * ((s32) in[3]) +
+					    ((limb) ((s32) in2[2])) * ((s32) in[9]) +
+					    ((limb) ((s32) in2[9])) * ((s32) in[2]);
+	output[12] =      ((limb) ((s32) in2[6])) * ((s32) in[6]) +
+				       2 * (((limb) ((s32) in2[5])) * ((s32) in[7]) +
+					    ((limb) ((s32) in2[7])) * ((s32) in[5]) +
+					    ((limb) ((s32) in2[3])) * ((s32) in[9]) +
+					    ((limb) ((s32) in2[9])) * ((s32) in[3])) +
+					    ((limb) ((s32) in2[4])) * ((s32) in[8]) +
+					    ((limb) ((s32) in2[8])) * ((s32) in[4]);
+	output[13] =      ((limb) ((s32) in2[6])) * ((s32) in[7]) +
+					    ((limb) ((s32) in2[7])) * ((s32) in[6]) +
+					    ((limb) ((s32) in2[5])) * ((s32) in[8]) +
+					    ((limb) ((s32) in2[8])) * ((s32) in[5]) +
+					    ((limb) ((s32) in2[4])) * ((s32) in[9]) +
+					    ((limb) ((s32) in2[9])) * ((s32) in[4]);
+	output[14] = 2 * (((limb) ((s32) in2[7])) * ((s32) in[7]) +
+					    ((limb) ((s32) in2[5])) * ((s32) in[9]) +
+					    ((limb) ((s32) in2[9])) * ((s32) in[5])) +
+					    ((limb) ((s32) in2[6])) * ((s32) in[8]) +
+					    ((limb) ((s32) in2[8])) * ((s32) in[6]);
+	output[15] =      ((limb) ((s32) in2[7])) * ((s32) in[8]) +
+					    ((limb) ((s32) in2[8])) * ((s32) in[7]) +
+					    ((limb) ((s32) in2[6])) * ((s32) in[9]) +
+					    ((limb) ((s32) in2[9])) * ((s32) in[6]);
+	output[16] =      ((limb) ((s32) in2[8])) * ((s32) in[8]) +
+				       2 * (((limb) ((s32) in2[7])) * ((s32) in[9]) +
+					    ((limb) ((s32) in2[9])) * ((s32) in[7]));
+	output[17] =      ((limb) ((s32) in2[8])) * ((s32) in[9]) +
+					    ((limb) ((s32) in2[9])) * ((s32) in[8]);
+	output[18] = 2 *  ((limb) ((s32) in2[9])) * ((s32) in[9]);
+}
+
+/* Reduce a long form to a short form by taking the input mod 2^255 - 19.
+ *
+ * On entry: |output[i]| < 14*2^54
+ * On exit: |output[0..8]| < 280*2^54
+ */
+static void freduce_degree(limb *output)
+{
+	/* Each of these shifts and adds ends up multiplying the value by 19.
+	 *
+	 * For output[0..8], the absolute entry value is < 14*2^54 and we add, at
+	 * most, 19*14*2^54 thus, on exit, |output[0..8]| < 280*2^54.
+	 */
+	output[8] += output[18] << 4;
+	output[8] += output[18] << 1;
+	output[8] += output[18];
+	output[7] += output[17] << 4;
+	output[7] += output[17] << 1;
+	output[7] += output[17];
+	output[6] += output[16] << 4;
+	output[6] += output[16] << 1;
+	output[6] += output[16];
+	output[5] += output[15] << 4;
+	output[5] += output[15] << 1;
+	output[5] += output[15];
+	output[4] += output[14] << 4;
+	output[4] += output[14] << 1;
+	output[4] += output[14];
+	output[3] += output[13] << 4;
+	output[3] += output[13] << 1;
+	output[3] += output[13];
+	output[2] += output[12] << 4;
+	output[2] += output[12] << 1;
+	output[2] += output[12];
+	output[1] += output[11] << 4;
+	output[1] += output[11] << 1;
+	output[1] += output[11];
+	output[0] += output[10] << 4;
+	output[0] += output[10] << 1;
+	output[0] += output[10];
+}
+
+#if (-1 & 3) != 3
+#error "This code only works on a two's complement system"
+#endif
+
+/* return v / 2^26, using only shifts and adds.
+ *
+ * On entry: v can take any value.
+ */
+static inline limb div_by_2_26(const limb v)
+{
+	/* High word of v; no shift needed. */
+	const u32 highword = (u32) (((u64) v) >> 32);
+	/* Set to all 1s if v was negative; else set to 0s. */
+	const s32 sign = ((s32) highword) >> 31;
+	/* Set to 0x3ffffff if v was negative; else set to 0. */
+	const s32 roundoff = ((u32) sign) >> 6;
+	/* Should return v / (1<<26) */
+	return (v + roundoff) >> 26;
+}
+
+/* return v / (2^25), using only shifts and adds.
+ *
+ * On entry: v can take any value.
+ */
+static inline limb div_by_2_25(const limb v)
+{
+	/* High word of v; no shift needed*/
+	const u32 highword = (u32) (((u64) v) >> 32);
+	/* Set to all 1s if v was negative; else set to 0s. */
+	const s32 sign = ((s32) highword) >> 31;
+	/* Set to 0x1ffffff if v was negative; else set to 0. */
+	const s32 roundoff = ((u32) sign) >> 7;
+	/* Should return v / (1<<25) */
+	return (v + roundoff) >> 25;
+}
+
+/* Reduce all coefficients of the short form input so that |x| < 2^26.
+ *
+ * On entry: |output[i]| < 280*2^54
+ */
+static void freduce_coefficients(limb *output)
+{
+	unsigned int i;
+
+	output[10] = 0;
+
+	for (i = 0; i < 10; i += 2) {
+		limb over = div_by_2_26(output[i]);
+		/* The entry condition (that |output[i]| < 280*2^54) means that over is, at
+		 * most, 280*2^28 in the first iteration of this loop. This is added to the
+		 * next limb and we can approximate the resulting bound of that limb by
+		 * 281*2^54.
+		 */
+		output[i] -= over << 26;
+		output[i+1] += over;
+
+		/* For the first iteration, |output[i+1]| < 281*2^54, thus |over| <
+		 * 281*2^29. When this is added to the next limb, the resulting bound can
+		 * be approximated as 281*2^54.
+		 *
+		 * For subsequent iterations of the loop, 281*2^54 remains a conservative
+		 * bound and no overflow occurs.
+		 */
+		over = div_by_2_25(output[i+1]);
+		output[i+1] -= over << 25;
+		output[i+2] += over;
+	}
+	/* Now |output[10]| < 281*2^29 and all other coefficients are reduced. */
+	output[0] += output[10] << 4;
+	output[0] += output[10] << 1;
+	output[0] += output[10];
+
+	output[10] = 0;
+
+	/* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19*281*2^29
+	 * So |over| will be no more than 2^16.
+	 */
+	{
+		limb over = div_by_2_26(output[0]);
+
+		output[0] -= over << 26;
+		output[1] += over;
+	}
+
+	/* Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 2^16 < 2^26. The
+	 * bound on |output[1]| is sufficient to meet our needs.
+	 */
+}
+
+/* A helpful wrapper around fproduct: output = in * in2.
+ *
+ * On entry: |in[i]| < 2^27 and |in2[i]| < 2^27.
+ *
+ * output must be distinct to both inputs. The output is reduced degree
+ * (indeed, one need only provide storage for 10 limbs) and |output[i]| < 2^26.
+ */
+static void fmul(limb *output, const limb *in, const limb *in2)
+{
+	limb t[19];
+
+	fproduct(t, in, in2);
+	/* |t[i]| < 14*2^54 */
+	freduce_degree(t);
+	freduce_coefficients(t);
+	/* |t[i]| < 2^26 */
+	memcpy(output, t, sizeof(limb) * 10);
+}
+
+/* Square a number: output = in**2
+ *
+ * output must be distinct from the input. The inputs are reduced coefficient
+ * form, the output is not.
+ *
+ * output[x] <= 14 * the largest product of the input limbs.
+ */
+static void fsquare_inner(limb *output, const limb *in)
+{
+	output[0] =       ((limb) ((s32) in[0])) * ((s32) in[0]);
+	output[1] =  2 *  ((limb) ((s32) in[0])) * ((s32) in[1]);
+	output[2] =  2 * (((limb) ((s32) in[1])) * ((s32) in[1]) +
+					    ((limb) ((s32) in[0])) * ((s32) in[2]));
+	output[3] =  2 * (((limb) ((s32) in[1])) * ((s32) in[2]) +
+					    ((limb) ((s32) in[0])) * ((s32) in[3]));
+	output[4] =       ((limb) ((s32) in[2])) * ((s32) in[2]) +
+				       4 *  ((limb) ((s32) in[1])) * ((s32) in[3]) +
+				       2 *  ((limb) ((s32) in[0])) * ((s32) in[4]);
+	output[5] =  2 * (((limb) ((s32) in[2])) * ((s32) in[3]) +
+					    ((limb) ((s32) in[1])) * ((s32) in[4]) +
+					    ((limb) ((s32) in[0])) * ((s32) in[5]));
+	output[6] =  2 * (((limb) ((s32) in[3])) * ((s32) in[3]) +
+					    ((limb) ((s32) in[2])) * ((s32) in[4]) +
+					    ((limb) ((s32) in[0])) * ((s32) in[6]) +
+				       2 *  ((limb) ((s32) in[1])) * ((s32) in[5]));
+	output[7] =  2 * (((limb) ((s32) in[3])) * ((s32) in[4]) +
+					    ((limb) ((s32) in[2])) * ((s32) in[5]) +
+					    ((limb) ((s32) in[1])) * ((s32) in[6]) +
+					    ((limb) ((s32) in[0])) * ((s32) in[7]));
+	output[8] =       ((limb) ((s32) in[4])) * ((s32) in[4]) +
+				       2 * (((limb) ((s32) in[2])) * ((s32) in[6]) +
+					    ((limb) ((s32) in[0])) * ((s32) in[8]) +
+				       2 * (((limb) ((s32) in[1])) * ((s32) in[7]) +
+					    ((limb) ((s32) in[3])) * ((s32) in[5])));
+	output[9] =  2 * (((limb) ((s32) in[4])) * ((s32) in[5]) +
+					    ((limb) ((s32) in[3])) * ((s32) in[6]) +
+					    ((limb) ((s32) in[2])) * ((s32) in[7]) +
+					    ((limb) ((s32) in[1])) * ((s32) in[8]) +
+					    ((limb) ((s32) in[0])) * ((s32) in[9]));
+	output[10] = 2 * (((limb) ((s32) in[5])) * ((s32) in[5]) +
+					    ((limb) ((s32) in[4])) * ((s32) in[6]) +
+					    ((limb) ((s32) in[2])) * ((s32) in[8]) +
+				       2 * (((limb) ((s32) in[3])) * ((s32) in[7]) +
+					    ((limb) ((s32) in[1])) * ((s32) in[9])));
+	output[11] = 2 * (((limb) ((s32) in[5])) * ((s32) in[6]) +
+					    ((limb) ((s32) in[4])) * ((s32) in[7]) +
+					    ((limb) ((s32) in[3])) * ((s32) in[8]) +
+					    ((limb) ((s32) in[2])) * ((s32) in[9]));
+	output[12] =      ((limb) ((s32) in[6])) * ((s32) in[6]) +
+				       2 * (((limb) ((s32) in[4])) * ((s32) in[8]) +
+				       2 * (((limb) ((s32) in[5])) * ((s32) in[7]) +
+					    ((limb) ((s32) in[3])) * ((s32) in[9])));
+	output[13] = 2 * (((limb) ((s32) in[6])) * ((s32) in[7]) +
+					    ((limb) ((s32) in[5])) * ((s32) in[8]) +
+					    ((limb) ((s32) in[4])) * ((s32) in[9]));
+	output[14] = 2 * (((limb) ((s32) in[7])) * ((s32) in[7]) +
+					    ((limb) ((s32) in[6])) * ((s32) in[8]) +
+				       2 *  ((limb) ((s32) in[5])) * ((s32) in[9]));
+	output[15] = 2 * (((limb) ((s32) in[7])) * ((s32) in[8]) +
+					    ((limb) ((s32) in[6])) * ((s32) in[9]));
+	output[16] =      ((limb) ((s32) in[8])) * ((s32) in[8]) +
+				       4 *  ((limb) ((s32) in[7])) * ((s32) in[9]);
+	output[17] = 2 *  ((limb) ((s32) in[8])) * ((s32) in[9]);
+	output[18] = 2 *  ((limb) ((s32) in[9])) * ((s32) in[9]);
+}
+
+/* fsquare sets output = in^2.
+ *
+ * On entry: The |in| argument is in reduced coefficients form and |in[i]| <
+ * 2^27.
+ *
+ * On exit: The |output| argument is in reduced coefficients form (indeed, one
+ * need only provide storage for 10 limbs) and |out[i]| < 2^26.
+ */
+static void fsquare(limb *output, const limb *in)
+{
+	limb t[19];
+
+	fsquare_inner(t, in);
+	/* |t[i]| < 14*2^54 because the largest product of two limbs will be <
+	 * 2^(27+27) and fsquare_inner adds together, at most, 14 of those
+	 * products.
+	 */
+	freduce_degree(t);
+	freduce_coefficients(t);
+	/* |t[i]| < 2^26 */
+	memcpy(output, t, sizeof(limb) * 10);
+}
+
+/* Take a little-endian, 32-byte number and expand it into polynomial form */
+static inline void fexpand(limb *output, const u8 *input)
+{
+#define F(n, start, shift, mask) \
+	output[n] = ((((limb) input[start + 0]) | \
+		      ((limb) input[start + 1]) << 8 | \
+		      ((limb) input[start + 2]) << 16 | \
+		      ((limb) input[start + 3]) << 24) >> shift) & mask;
+	F(0, 0, 0, 0x3ffffff);
+	F(1, 3, 2, 0x1ffffff);
+	F(2, 6, 3, 0x3ffffff);
+	F(3, 9, 5, 0x1ffffff);
+	F(4, 12, 6, 0x3ffffff);
+	F(5, 16, 0, 0x1ffffff);
+	F(6, 19, 1, 0x3ffffff);
+	F(7, 22, 3, 0x1ffffff);
+	F(8, 25, 4, 0x3ffffff);
+	F(9, 28, 6, 0x1ffffff);
+#undef F
+}
+
+#if (-32 >> 1) != -16
+#error "This code only works when >> does sign-extension on negative numbers"
+#endif
+
+/* s32_eq returns 0xffffffff iff a == b and zero otherwise. */
+static s32 s32_eq(s32 a, s32 b)
+{
+	a = ~(a ^ b);
+	a &= a << 16;
+	a &= a << 8;
+	a &= a << 4;
+	a &= a << 2;
+	a &= a << 1;
+	return a >> 31;
+}
+
+/* s32_gte returns 0xffffffff if a >= b and zero otherwise, where a and b are
+ * both non-negative.
+ */
+static s32 s32_gte(s32 a, s32 b)
+{
+	a -= b;
+	/* a >= 0 iff a >= b. */
+	return ~(a >> 31);
+}
+
+/* Take a fully reduced polynomial form number and contract it into a
+ * little-endian, 32-byte array.
+ *
+ * On entry: |input_limbs[i]| < 2^26
+ */
+static void fcontract(u8 *output, limb *input_limbs)
+{
+	int i;
+	int j;
+	s32 input[10];
+	s32 mask;
+
+	/* |input_limbs[i]| < 2^26, so it's valid to convert to an s32. */
+	for (i = 0; i < 10; i++) {
+		input[i] = input_limbs[i];
+	}
+
+	for (j = 0; j < 2; ++j) {
+		for (i = 0; i < 9; ++i) {
+			if ((i & 1) == 1) {
+				/* This calculation is a time-invariant way to make input[i]
+				 * non-negative by borrowing from the next-larger limb.
+				 */
+				const s32 mask = input[i] >> 31;
+				const s32 carry = -((input[i] & mask) >> 25);
+
+				input[i] = input[i] + (carry << 25);
+				input[i+1] = input[i+1] - carry;
+			} else {
+				const s32 mask = input[i] >> 31;
+				const s32 carry = -((input[i] & mask) >> 26);
+
+				input[i] = input[i] + (carry << 26);
+				input[i+1] = input[i+1] - carry;
+			}
+		}
+
+		/* There's no greater limb for input[9] to borrow from, but we can multiply
+		 * by 19 and borrow from input[0], which is valid mod 2^255-19.
+		 */
+		{
+			const s32 mask = input[9] >> 31;
+			const s32 carry = -((input[9] & mask) >> 25);
+
+			input[9] = input[9] + (carry << 25);
+			input[0] = input[0] - (carry * 19);
+		}
+
+		/* After the first iteration, input[1..9] are non-negative and fit within
+		 * 25 or 26 bits, depending on position. However, input[0] may be
+		 * negative.
+		 */
+	}
+
+	/* The first borrow-propagation pass above ended with every limb
+	   except (possibly) input[0] non-negative.
+	   If input[0] was negative after the first pass, then it was because of a
+	   carry from input[9]. On entry, input[9] < 2^26 so the carry was, at most,
+	   one, since (2**26-1) >> 25 = 1. Thus input[0] >= -19.
+	   In the second pass, each limb is decreased by at most one. Thus the second
+	   borrow-propagation pass could only have wrapped around to decrease
+	   input[0] again if the first pass left input[0] negative *and* input[1]
+	   through input[9] were all zero.  In that case, input[1] is now 2^25 - 1,
+	   and this last borrow-propagation step will leave input[1] non-negative. */
+	{
+		const s32 mask = input[0] >> 31;
+		const s32 carry = -((input[0] & mask) >> 26);
+
+		input[0] = input[0] + (carry << 26);
+		input[1] = input[1] - carry;
+	}
+
+	/* All input[i] are now non-negative. However, there might be values between
+	 * 2^25 and 2^26 in a limb which is, nominally, 25 bits wide.
+	 */
+	for (j = 0; j < 2; j++) {
+		for (i = 0; i < 9; i++) {
+			if ((i & 1) == 1) {
+				const s32 carry = input[i] >> 25;
+
+				input[i] &= 0x1ffffff;
+				input[i+1] += carry;
+			} else {
+				const s32 carry = input[i] >> 26;
+
+				input[i] &= 0x3ffffff;
+				input[i+1] += carry;
+			}
+		}
+
+		{
+			const s32 carry = input[9] >> 25;
+
+			input[9] &= 0x1ffffff;
+			input[0] += 19*carry;
+		}
+	}
+
+	/* If the first carry-chain pass, just above, ended up with a carry from
+	 * input[9], and that caused input[0] to be out-of-bounds, then input[0] was
+	 * < 2^26 + 2*19, because the carry was, at most, two.
+	 *
+	 * If the second pass carried from input[9] again then input[0] is < 2*19 and
+	 * the input[9] -> input[0] carry didn't push input[0] out of bounds.
+	 */
+
+	/* It still remains the case that input might be between 2^255-19 and 2^255.
+	 * In this case, input[1..9] must take their maximum value and input[0] must
+	 * be >= (2^255-19) & 0x3ffffff, which is 0x3ffffed.
+	 */
+	mask = s32_gte(input[0], 0x3ffffed);
+	for (i = 1; i < 10; i++) {
+		if ((i & 1) == 1) {
+			mask &= s32_eq(input[i], 0x1ffffff);
+		} else {
+			mask &= s32_eq(input[i], 0x3ffffff);
+		}
+	}
+
+	/* mask is either 0xffffffff (if input >= 2^255-19) and zero otherwise. Thus
+	 * this conditionally subtracts 2^255-19.
+	 */
+	input[0] -= mask & 0x3ffffed;
+
+	for (i = 1; i < 10; i++) {
+		if ((i & 1) == 1) {
+			input[i] -= mask & 0x1ffffff;
+		} else {
+			input[i] -= mask & 0x3ffffff;
+		}
+	}
+
+	input[1] <<= 2;
+	input[2] <<= 3;
+	input[3] <<= 5;
+	input[4] <<= 6;
+	input[6] <<= 1;
+	input[7] <<= 3;
+	input[8] <<= 4;
+	input[9] <<= 6;
+#define F(i, s) \
+	output[s+0] |=  input[i] & 0xff; \
+	output[s+1]  = (input[i] >> 8) & 0xff; \
+	output[s+2]  = (input[i] >> 16) & 0xff; \
+	output[s+3]  = (input[i] >> 24) & 0xff;
+	output[0] = 0;
+	output[16] = 0;
+	F(0, 0);
+	F(1, 3);
+	F(2, 6);
+	F(3, 9);
+	F(4, 12);
+	F(5, 16);
+	F(6, 19);
+	F(7, 22);
+	F(8, 25);
+	F(9, 28);
+#undef F
+}
+
+/* Conditionally swap two reduced-form limb arrays if 'iswap' is 1, but leave
+ * them unchanged if 'iswap' is 0.  Runs in data-invariant time to avoid
+ * side-channel attacks.
+ *
+ * NOTE that this function requires that 'iswap' be 1 or 0; other values give
+ * wrong results.  Also, the two limb arrays must be in reduced-coefficient,
+ * reduced-degree form: the values in a[10..19] or b[10..19] aren't swapped,
+ * and all all values in a[0..9],b[0..9] must have magnitude less than
+ * INT32_MAX.
+ */
+static void swap_conditional(limb a[19], limb b[19], limb iswap)
+{
+	unsigned int i;
+	const s32 swap = (s32) -iswap;
+
+	for (i = 0; i < 10; ++i) {
+		const s32 x = swap & (((s32)a[i]) ^ ((s32)b[i]));
+
+		a[i] = ((s32)a[i]) ^ x;
+		b[i] = ((s32)b[i]) ^ x;
+	}
+}
+
+static void crecip(limb *out, const limb *z)
+{
+	limb z2[10];
+	limb z9[10];
+	limb z11[10];
+	limb z2_5_0[10];
+	limb z2_10_0[10];
+	limb z2_20_0[10];
+	limb z2_50_0[10];
+	limb z2_100_0[10];
+	limb t0[10];
+	limb t1[10];
+	int i;
+
+	/* 2 */ fsquare(z2, z);
+	/* 4 */ fsquare(t1, z2);
+	/* 8 */ fsquare(t0, t1);
+	/* 9 */ fmul(z9, t0, z);
+	/* 11 */ fmul(z11, z9, z2);
+	/* 22 */ fsquare(t0, z11);
+	/* 2^5 - 2^0 = 31 */ fmul(z2_5_0, t0, z9);
+
+	/* 2^6 - 2^1 */ fsquare(t0, z2_5_0);
+	/* 2^7 - 2^2 */ fsquare(t1, t0);
+	/* 2^8 - 2^3 */ fsquare(t0, t1);
+	/* 2^9 - 2^4 */ fsquare(t1, t0);
+	/* 2^10 - 2^5 */ fsquare(t0, t1);
+	/* 2^10 - 2^0 */ fmul(z2_10_0, t0, z2_5_0);
+
+	/* 2^11 - 2^1 */ fsquare(t0, z2_10_0);
+	/* 2^12 - 2^2 */ fsquare(t1, t0);
+	/* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t0, t1); fsquare(t1, t0); }
+	/* 2^20 - 2^0 */ fmul(z2_20_0, t1, z2_10_0);
+
+	/* 2^21 - 2^1 */ fsquare(t0, z2_20_0);
+	/* 2^22 - 2^2 */ fsquare(t1, t0);
+	/* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) { fsquare(t0, t1); fsquare(t1, t0); }
+	/* 2^40 - 2^0 */ fmul(t0, t1, z2_20_0);
+
+	/* 2^41 - 2^1 */ fsquare(t1, t0);
+	/* 2^42 - 2^2 */ fsquare(t0, t1);
+	/* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t1, t0); fsquare(t0, t1); }
+	/* 2^50 - 2^0 */ fmul(z2_50_0, t0, z2_10_0);
+
+	/* 2^51 - 2^1 */ fsquare(t0, z2_50_0);
+	/* 2^52 - 2^2 */ fsquare(t1, t0);
+	/* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0, t1); fsquare(t1, t0); }
+	/* 2^100 - 2^0 */ fmul(z2_100_0, t1, z2_50_0);
+
+	/* 2^101 - 2^1 */ fsquare(t1, z2_100_0);
+	/* 2^102 - 2^2 */ fsquare(t0, t1);
+	/* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { fsquare(t1, t0); fsquare(t0, t1); }
+	/* 2^200 - 2^0 */ fmul(t1, t0, z2_100_0);
+
+	/* 2^201 - 2^1 */ fsquare(t0, t1);
+	/* 2^202 - 2^2 */ fsquare(t1, t0);
+	/* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0, t1); fsquare(t1, t0); }
+	/* 2^250 - 2^0 */ fmul(t0, t1, z2_50_0);
+
+	/* 2^251 - 2^1 */ fsquare(t1, t0);
+	/* 2^252 - 2^2 */ fsquare(t0, t1);
+	/* 2^253 - 2^3 */ fsquare(t1, t0);
+	/* 2^254 - 2^4 */ fsquare(t0, t1);
+	/* 2^255 - 2^5 */ fsquare(t1, t0);
+	/* 2^255 - 21 */ fmul(out, t1, z11);
+}
+
+
+#ifdef ARCH_HAS_SEPARATE_IRQ_STACK
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ *   x2 z3: long form
+ *   x3 z3: long form
+ *   x z: short form, destroyed
+ *   xprime zprime: short form, destroyed
+ *   qmqp: short form, preserved
+ *
+ * On entry and exit, the absolute value of the limbs of all inputs and outputs
+ * are < 2^26.
+ */
+static void fmonty(limb *x2, limb *z2,  /* output 2Q */
+		   limb *x3, limb *z3,  /* output Q + Q' */
+		   limb *x, limb *z,    /* input Q */
+		   limb *xprime, limb *zprime,  /* input Q' */
+
+		   const limb *qmqp /* input Q - Q' */)
+{
+	limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19],
+				zzprime[19], zzzprime[19], xxxprime[19];
+
+	memcpy(origx, x, 10 * sizeof(limb));
+	fsum(x, z);
+	/* |x[i]| < 2^27 */
+	fdifference(z, origx);  /* does x - z */
+	/* |z[i]| < 2^27 */
+
+	memcpy(origxprime, xprime, sizeof(limb) * 10);
+	fsum(xprime, zprime);
+	/* |xprime[i]| < 2^27 */
+	fdifference(zprime, origxprime);
+	/* |zprime[i]| < 2^27 */
+	fproduct(xxprime, xprime, z);
+	/* |xxprime[i]| < 14*2^54: the largest product of two limbs will be <
+	 * 2^(27+27) and fproduct adds together, at most, 14 of those products.
+	 * (Approximating that to 2^58 doesn't work out.)
+	 */
+	fproduct(zzprime, x, zprime);
+	/* |zzprime[i]| < 14*2^54 */
+	freduce_degree(xxprime);
+	freduce_coefficients(xxprime);
+	/* |xxprime[i]| < 2^26 */
+	freduce_degree(zzprime);
+	freduce_coefficients(zzprime);
+	/* |zzprime[i]| < 2^26 */
+	memcpy(origxprime, xxprime, sizeof(limb) * 10);
+	fsum(xxprime, zzprime);
+	/* |xxprime[i]| < 2^27 */
+	fdifference(zzprime, origxprime);
+	/* |zzprime[i]| < 2^27 */
+	fsquare(xxxprime, xxprime);
+	/* |xxxprime[i]| < 2^26 */
+	fsquare(zzzprime, zzprime);
+	/* |zzzprime[i]| < 2^26 */
+	fproduct(zzprime, zzzprime, qmqp);
+	/* |zzprime[i]| < 14*2^52 */
+	freduce_degree(zzprime);
+	freduce_coefficients(zzprime);
+	/* |zzprime[i]| < 2^26 */
+	memcpy(x3, xxxprime, sizeof(limb) * 10);
+	memcpy(z3, zzprime, sizeof(limb) * 10);
+
+	fsquare(xx, x);
+	/* |xx[i]| < 2^26 */
+	fsquare(zz, z);
+	/* |zz[i]| < 2^26 */
+	fproduct(x2, xx, zz);
+	/* |x2[i]| < 14*2^52 */
+	freduce_degree(x2);
+	freduce_coefficients(x2);
+	/* |x2[i]| < 2^26 */
+	fdifference(zz, xx);  // does zz = xx - zz
+	/* |zz[i]| < 2^27 */
+	memset(zzz + 10, 0, sizeof(limb) * 9);
+	fscalar_product(zzz, zz, 121665);
+	/* |zzz[i]| < 2^(27+17) */
+	/* No need to call freduce_degree here:
+		 fscalar_product doesn't increase the degree of its input. */
+	freduce_coefficients(zzz);
+	/* |zzz[i]| < 2^26 */
+	fsum(zzz, xx);
+	/* |zzz[i]| < 2^27 */
+	fproduct(z2, zz, zzz);
+	/* |z2[i]| < 14*2^(26+27) */
+	freduce_degree(z2);
+	freduce_coefficients(z2);
+	/* |z2|i| < 2^26 */
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ *   resultx/resultz: the x coordinate of the resulting curve point (short form)
+ *   n: a little endian, 32-byte number
+ *   q: a point of the curve (short form)
+ */
+static void cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q)
+{
+	limb a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0};
+	limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
+	limb e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1};
+	limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
+
+	unsigned int i, j;
+
+	memcpy(nqpqx, q, sizeof(limb) * 10);
+
+	for (i = 0; i < 32; ++i) {
+		u8 byte = n[31 - i];
+
+		for (j = 0; j < 8; ++j) {
+			const limb bit = byte >> 7;
+
+			swap_conditional(nqx, nqpqx, bit);
+			swap_conditional(nqz, nqpqz, bit);
+			fmonty(nqx2, nqz2,
+			       nqpqx2, nqpqz2,
+			       nqx, nqz,
+			       nqpqx, nqpqz,
+			       q);
+			swap_conditional(nqx2, nqpqx2, bit);
+			swap_conditional(nqz2, nqpqz2, bit);
+
+			t = nqx;
+			nqx = nqx2;
+			nqx2 = t;
+			t = nqz;
+			nqz = nqz2;
+			nqz2 = t;
+			t = nqpqx;
+			nqpqx = nqpqx2;
+			nqpqx2 = t;
+			t = nqpqz;
+			nqpqz = nqpqz2;
+			nqpqz2 = t;
+
+			byte <<= 1;
+		}
+	}
+
+	memcpy(resultx, nqx, sizeof(limb) * 10);
+	memcpy(resultz, nqz, sizeof(limb) * 10);
+}
+
+static bool curve25519_donna(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE])
+{
+	limb bp[10], x[10], z[11], zmone[10];
+	u8 e[32];
+
+	memcpy(e, secret, 32);
+	normalize_secret(e);
+
+	fexpand(bp, basepoint);
+	cmult(x, z, e, bp);
+	crecip(zmone, z);
+	fmul(z, x, zmone);
+	fcontract(mypublic, z);
+
+	memzero_explicit(e, sizeof(e));
+	memzero_explicit(bp, sizeof(bp));
+	memzero_explicit(x, sizeof(x));
+	memzero_explicit(z, sizeof(z));
+	memzero_explicit(zmone, sizeof(zmone));
+
+	return true;
+}
+#else
+struct other_stack {
+	limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19], zzprime[19], zzzprime[19], xxxprime[19];
+	limb a[19], b[19], c[19], d[19], e[19], f[19], g[19], h[19];
+	limb bp[10], x[10], z[11], zmone[10];
+	u8 ee[32];
+};
+
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ *   x2 z3: long form
+ *   x3 z3: long form
+ *   x z: short form, destroyed
+ *   xprime zprime: short form, destroyed
+ *   qmqp: short form, preserved
+ *
+ * On entry and exit, the absolute value of the limbs of all inputs and outputs
+ * are < 2^26.
+ */
+static void fmonty(struct other_stack *s,
+		   limb *x2, limb *z2,  /* output 2Q */
+		   limb *x3, limb *z3,  /* output Q + Q' */
+		   limb *x, limb *z,    /* input Q */
+		   limb *xprime, limb *zprime,  /* input Q' */
+
+		   const limb *qmqp /* input Q - Q' */)
+{
+	memcpy(s->origx, x, 10 * sizeof(limb));
+	fsum(x, z);
+	/* |x[i]| < 2^27 */
+	fdifference(z, s->origx);  /* does x - z */
+	/* |z[i]| < 2^27 */
+
+	memcpy(s->origxprime, xprime, sizeof(limb) * 10);
+	fsum(xprime, zprime);
+	/* |xprime[i]| < 2^27 */
+	fdifference(zprime, s->origxprime);
+	/* |zprime[i]| < 2^27 */
+	fproduct(s->xxprime, xprime, z);
+	/* |s->xxprime[i]| < 14*2^54: the largest product of two limbs will be <
+	 * 2^(27+27) and fproduct adds together, at most, 14 of those products.
+	 * (Approximating that to 2^58 doesn't work out.)
+	 */
+	fproduct(s->zzprime, x, zprime);
+	/* |s->zzprime[i]| < 14*2^54 */
+	freduce_degree(s->xxprime);
+	freduce_coefficients(s->xxprime);
+	/* |s->xxprime[i]| < 2^26 */
+	freduce_degree(s->zzprime);
+	freduce_coefficients(s->zzprime);
+	/* |s->zzprime[i]| < 2^26 */
+	memcpy(s->origxprime, s->xxprime, sizeof(limb) * 10);
+	fsum(s->xxprime, s->zzprime);
+	/* |s->xxprime[i]| < 2^27 */
+	fdifference(s->zzprime, s->origxprime);
+	/* |s->zzprime[i]| < 2^27 */
+	fsquare(s->xxxprime, s->xxprime);
+	/* |s->xxxprime[i]| < 2^26 */
+	fsquare(s->zzzprime, s->zzprime);
+	/* |s->zzzprime[i]| < 2^26 */
+	fproduct(s->zzprime, s->zzzprime, qmqp);
+	/* |s->zzprime[i]| < 14*2^52 */
+	freduce_degree(s->zzprime);
+	freduce_coefficients(s->zzprime);
+	/* |s->zzprime[i]| < 2^26 */
+	memcpy(x3, s->xxxprime, sizeof(limb) * 10);
+	memcpy(z3, s->zzprime, sizeof(limb) * 10);
+
+	fsquare(s->xx, x);
+	/* |s->xx[i]| < 2^26 */
+	fsquare(s->zz, z);
+	/* |s->zz[i]| < 2^26 */
+	fproduct(x2, s->xx, s->zz);
+	/* |x2[i]| < 14*2^52 */
+	freduce_degree(x2);
+	freduce_coefficients(x2);
+	/* |x2[i]| < 2^26 */
+	fdifference(s->zz, s->xx);  // does s->zz = s->xx - s->zz
+	/* |s->zz[i]| < 2^27 */
+	memset(s->zzz + 10, 0, sizeof(limb) * 9);
+	fscalar_product(s->zzz, s->zz, 121665);
+	/* |s->zzz[i]| < 2^(27+17) */
+	/* No need to call freduce_degree here:
+		 fscalar_product doesn't increase the degree of its input. */
+	freduce_coefficients(s->zzz);
+	/* |s->zzz[i]| < 2^26 */
+	fsum(s->zzz, s->xx);
+	/* |s->zzz[i]| < 2^27 */
+	fproduct(z2, s->zz, s->zzz);
+	/* |z2[i]| < 14*2^(26+27) */
+	freduce_degree(z2);
+	freduce_coefficients(z2);
+	/* |z2|i| < 2^26 */
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ *   resultx/resultz: the x coordinate of the resulting curve point (short form)
+ *   n: a little endian, 32-byte number
+ *   q: a point of the curve (short form)
+ */
+static void cmult(struct other_stack *s, limb *resultx, limb *resultz, const u8 *n, const limb *q)
+{
+	unsigned int i, j;
+	limb *nqpqx = s->a, *nqpqz = s->b, *nqx = s->c, *nqz = s->d, *t;
+	limb *nqpqx2 = s->e, *nqpqz2 = s->f, *nqx2 = s->g, *nqz2 = s->h;
+
+	*nqpqz = *nqx = *nqpqz2 = *nqz2 = 1;
+	memcpy(nqpqx, q, sizeof(limb) * 10);
+
+	for (i = 0; i < 32; ++i) {
+		u8 byte = n[31 - i];
+
+		for (j = 0; j < 8; ++j) {
+			const limb bit = byte >> 7;
+
+			swap_conditional(nqx, nqpqx, bit);
+			swap_conditional(nqz, nqpqz, bit);
+			fmonty(s,
+			       nqx2, nqz2,
+			       nqpqx2, nqpqz2,
+			       nqx, nqz,
+			       nqpqx, nqpqz,
+			       q);
+			swap_conditional(nqx2, nqpqx2, bit);
+			swap_conditional(nqz2, nqpqz2, bit);
+
+			t = nqx;
+			nqx = nqx2;
+			nqx2 = t;
+			t = nqz;
+			nqz = nqz2;
+			nqz2 = t;
+			t = nqpqx;
+			nqpqx = nqpqx2;
+			nqpqx2 = t;
+			t = nqpqz;
+			nqpqz = nqpqz2;
+			nqpqz2 = t;
+
+			byte <<= 1;
+		}
+	}
+
+	memcpy(resultx, nqx, sizeof(limb) * 10);
+	memcpy(resultz, nqz, sizeof(limb) * 10);
+}
+
+static bool curve25519_donna(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE])
+{
+	struct other_stack *s = kzalloc(sizeof(struct other_stack), GFP_KERNEL);
+
+	if (unlikely(!s))
+		return false;
+
+	memcpy(s->ee, secret, 32);
+	normalize_secret(s->ee);
+
+	fexpand(s->bp, basepoint);
+	cmult(s, s->x, s->z, s->ee, s->bp);
+	crecip(s->zmone, s->z);
+	fmul(s->z, s->x, s->zmone);
+	fcontract(mypublic, s->z);
+
+	kzfree(s);
+	return true;
+}
+#endif
+#endif
+
+static const u8 null_point[CURVE25519_POINT_SIZE] = { 0 };
+
+bool curve25519(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE])
+{
+	bool ret = true;
+#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX)
+	if (curve25519_use_avx && irq_fpu_usable()) {
+		kernel_fpu_begin();
+		curve25519_sandy2x(mypublic, secret, basepoint);
+		kernel_fpu_end();
+	} else
+#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM)
+	if (curve25519_use_neon && may_use_simd()) {
+		kernel_neon_begin();
+		curve25519_neon(mypublic, secret, basepoint);
+		kernel_neon_end();
+	} else
+#endif
+		ret = curve25519_donna(mypublic, secret, basepoint);
+
+	if (!ret) /* OOM or the like; not the result of a cryptographic operation or string comparison. */
+		return ret;
+
+	return crypto_memneq(mypublic, null_point, CURVE25519_POINT_SIZE);
+}
+
+bool curve25519_generate_public(u8 pub[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE])
+{
+	static const u8 basepoint[CURVE25519_POINT_SIZE] __aligned(32) = { 9 };
+
+	if (unlikely(!crypto_memneq(secret, null_point, CURVE25519_POINT_SIZE)))
+		return false;
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX)
+	if (curve25519_use_avx && irq_fpu_usable()) {
+		kernel_fpu_begin();
+		curve25519_sandy2x_base(pub, secret);
+		kernel_fpu_end();
+		return crypto_memneq(pub, null_point, CURVE25519_POINT_SIZE);
+	}
+#endif
+
+	return curve25519(pub, secret, basepoint);
+}
+
+void curve25519_generate_secret(u8 secret[CURVE25519_POINT_SIZE])
+{
+	get_random_bytes_wait(secret, CURVE25519_POINT_SIZE);
+	normalize_secret(secret);
+}
+
+#include "../selftest/curve25519.h"
diff --git b/net/wireguard/crypto/curve25519.h b/net/wireguard/crypto/curve25519.h
new file mode 100644
index 0000000..40f50ae
--- /dev/null
+++ b/net/wireguard/crypto/curve25519.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_CURVE25519_H
+#define _WG_CURVE25519_H
+
+#include <linux/types.h>
+
+enum curve25519_lengths {
+	CURVE25519_POINT_SIZE = 32
+};
+
+bool __must_check curve25519(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE]);
+void curve25519_generate_secret(u8 secret[CURVE25519_POINT_SIZE]);
+bool __must_check curve25519_generate_public(u8 pub[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE]);
+
+void curve25519_fpu_init(void);
+
+#ifdef DEBUG
+bool curve25519_selftest(void);
+#endif
+
+#endif /* _WG_CURVE25519_H */
diff --git b/net/wireguard/crypto/poly1305-arm.S b/net/wireguard/crypto/poly1305-arm.S
new file mode 100644
index 0000000..75e7cab
--- /dev/null
+++ b/net/wireguard/crypto/poly1305-arm.S
@@ -0,0 +1,1115 @@
+/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+
+.text
+#if defined(__thumb2__)
+.syntax	unified
+.thumb
+#else
+.code	32
+#endif
+
+.align	5
+ENTRY(poly1305_init_arm)
+	stmdb	sp!,{r4-r11}
+
+	eor	r3,r3,r3
+	cmp	r1,#0
+	str	r3,[r0,#0]		@ zero hash value
+	str	r3,[r0,#4]
+	str	r3,[r0,#8]
+	str	r3,[r0,#12]
+	str	r3,[r0,#16]
+	str	r3,[r0,#36]		@ is_base2_26
+	add	r0,r0,#20
+
+#ifdef	__thumb2__
+	it	eq
+#endif
+	moveq	r0,#0
+	beq	.Lno_key
+
+	ldrb	r4,[r1,#0]
+	mov	r10,#0x0fffffff
+	ldrb	r5,[r1,#1]
+	and	r3,r10,#-4		@ 0x0ffffffc
+	ldrb	r6,[r1,#2]
+	ldrb	r7,[r1,#3]
+	orr	r4,r4,r5,lsl#8
+	ldrb	r5,[r1,#4]
+	orr	r4,r4,r6,lsl#16
+	ldrb	r6,[r1,#5]
+	orr	r4,r4,r7,lsl#24
+	ldrb	r7,[r1,#6]
+	and	r4,r4,r10
+
+	ldrb	r8,[r1,#7]
+	orr	r5,r5,r6,lsl#8
+	ldrb	r6,[r1,#8]
+	orr	r5,r5,r7,lsl#16
+	ldrb	r7,[r1,#9]
+	orr	r5,r5,r8,lsl#24
+	ldrb	r8,[r1,#10]
+	and	r5,r5,r3
+
+	ldrb	r9,[r1,#11]
+	orr	r6,r6,r7,lsl#8
+	ldrb	r7,[r1,#12]
+	orr	r6,r6,r8,lsl#16
+	ldrb	r8,[r1,#13]
+	orr	r6,r6,r9,lsl#24
+	ldrb	r9,[r1,#14]
+	and	r6,r6,r3
+
+	ldrb	r10,[r1,#15]
+	orr	r7,r7,r8,lsl#8
+	str	r4,[r0,#0]
+	orr	r7,r7,r9,lsl#16
+	str	r5,[r0,#4]
+	orr	r7,r7,r10,lsl#24
+	str	r6,[r0,#8]
+	and	r7,r7,r3
+	str	r7,[r0,#12]
+.Lno_key:
+	ldmia	sp!,{r4-r11}
+#if __LINUX_ARM_ARCH__ >= 5
+	bx	lr				@ bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+ENDPROC(poly1305_init_arm)
+
+.align	5
+ENTRY(poly1305_blocks_arm)
+.Lpoly1305_blocks_arm:
+	stmdb	sp!,{r3-r11,lr}
+
+	ands	r2,r2,#-16
+	beq	.Lno_data
+
+	cmp	r3,#0
+	add	r2,r2,r1		@ end pointer
+	sub	sp,sp,#32
+
+	ldmia	r0,{r4-r12}		@ load context
+
+	str	r0,[sp,#12]		@ offload stuff
+	mov	lr,r1
+	str	r2,[sp,#16]
+	str	r10,[sp,#20]
+	str	r11,[sp,#24]
+	str	r12,[sp,#28]
+	b	.Loop
+
+.Loop:
+#if __LINUX_ARM_ARCH__ < 7
+	ldrb	r0,[lr],#16		@ load input
+#ifdef	__thumb2__
+	it	hi
+#endif
+	addhi	r8,r8,#1		@ 1<<128
+	ldrb	r1,[lr,#-15]
+	ldrb	r2,[lr,#-14]
+	ldrb	r3,[lr,#-13]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-12]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-11]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-10]
+	adds	r4,r4,r3		@ accumulate input
+
+	ldrb	r3,[lr,#-9]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-8]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-7]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-6]
+	adcs	r5,r5,r3
+
+	ldrb	r3,[lr,#-5]
+	orr	r1,r0,r1,lsl#8
+	ldrb	r0,[lr,#-4]
+	orr	r2,r1,r2,lsl#16
+	ldrb	r1,[lr,#-3]
+	orr	r3,r2,r3,lsl#24
+	ldrb	r2,[lr,#-2]
+	adcs	r6,r6,r3
+
+	ldrb	r3,[lr,#-1]
+	orr	r1,r0,r1,lsl#8
+	str	lr,[sp,#8]		@ offload input pointer
+	orr	r2,r1,r2,lsl#16
+	add	r10,r10,r10,lsr#2
+	orr	r3,r2,r3,lsl#24
+#else
+	ldr	r0,[lr],#16		@ load input
+#ifdef	__thumb2__
+	it	hi
+#endif
+	addhi	r8,r8,#1		@ padbit
+	ldr	r1,[lr,#-12]
+	ldr	r2,[lr,#-8]
+	ldr	r3,[lr,#-4]
+#ifdef	__ARMEB__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+	adds	r4,r4,r0		@ accumulate input
+	str	lr,[sp,#8]		@ offload input pointer
+	adcs	r5,r5,r1
+	add	r10,r10,r10,lsr#2
+	adcs	r6,r6,r2
+#endif
+	add	r11,r11,r11,lsr#2
+	adcs	r7,r7,r3
+	add	r12,r12,r12,lsr#2
+
+	umull	r2,r3,r5,r9
+	 adc	r8,r8,#0
+	umull	r0,r1,r4,r9
+	umlal	r2,r3,r8,r10
+	umlal	r0,r1,r7,r10
+	ldr	r10,[sp,#20]		@ reload r10
+	umlal	r2,r3,r6,r12
+	umlal	r0,r1,r5,r12
+	umlal	r2,r3,r7,r11
+	umlal	r0,r1,r6,r11
+	umlal	r2,r3,r4,r10
+	str	r0,[sp,#0]		@ future r4
+	 mul	r0,r11,r8
+	ldr	r11,[sp,#24]		@ reload r11
+	adds	r2,r2,r1		@ d1+=d0>>32
+	 eor	r1,r1,r1
+	adc	lr,r3,#0		@ future r6
+	str	r2,[sp,#4]		@ future r5
+
+	mul	r2,r12,r8
+	eor	r3,r3,r3
+	umlal	r0,r1,r7,r12
+	ldr	r12,[sp,#28]		@ reload r12
+	umlal	r2,r3,r7,r9
+	umlal	r0,r1,r6,r9
+	umlal	r2,r3,r6,r10
+	umlal	r0,r1,r5,r10
+	umlal	r2,r3,r5,r11
+	umlal	r0,r1,r4,r11
+	umlal	r2,r3,r4,r12
+	ldr	r4,[sp,#0]
+	mul	r8,r9,r8
+	ldr	r5,[sp,#4]
+
+	adds	r6,lr,r0		@ d2+=d1>>32
+	ldr	lr,[sp,#8]		@ reload input pointer
+	adc	r1,r1,#0
+	adds	r7,r2,r1		@ d3+=d2>>32
+	ldr	r0,[sp,#16]		@ reload end pointer
+	adc	r3,r3,#0
+	add	r8,r8,r3		@ h4+=d3>>32
+
+	and	r1,r8,#-4
+	and	r8,r8,#3
+	add	r1,r1,r1,lsr#2		@ *=5
+	adds	r4,r4,r1
+	adcs	r5,r5,#0
+	adcs	r6,r6,#0
+	adcs	r7,r7,#0
+	adc	r8,r8,#0
+
+	cmp	r0,lr			@ done yet?
+	bhi	.Loop
+
+	ldr	r0,[sp,#12]
+	add	sp,sp,#32
+	stmia	r0,{r4-r8}		@ store the result
+
+.Lno_data:
+#if __LINUX_ARM_ARCH__ >= 5
+	ldmia	sp!,{r3-r11,pc}
+#else
+	ldmia	sp!,{r3-r11,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+ENDPROC(poly1305_blocks_arm)
+
+.align	5
+ENTRY(poly1305_emit_arm)
+	stmdb	sp!,{r4-r11}
+.Lpoly1305_emit_enter:
+	ldmia	r0,{r3-r7}
+	adds	r8,r3,#5		@ compare to modulus
+	adcs	r9,r4,#0
+	adcs	r10,r5,#0
+	adcs	r11,r6,#0
+	adc	r7,r7,#0
+	tst	r7,#4			@ did it carry/borrow?
+
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r3,r8
+	ldr	r8,[r2,#0]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r4,r9
+	ldr	r9,[r2,#4]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r5,r10
+	ldr	r10,[r2,#8]
+#ifdef	__thumb2__
+	it	ne
+#endif
+	movne	r6,r11
+	ldr	r11,[r2,#12]
+
+	adds	r3,r3,r8
+	adcs	r4,r4,r9
+	adcs	r5,r5,r10
+	adc	r6,r6,r11
+
+#if __LINUX_ARM_ARCH__ >= 7
+#ifdef __ARMEB__
+	rev	r3,r3
+	rev	r4,r4
+	rev	r5,r5
+	rev	r6,r6
+#endif
+	str	r3,[r1,#0]
+	str	r4,[r1,#4]
+	str	r5,[r1,#8]
+	str	r6,[r1,#12]
+#else
+	strb	r3,[r1,#0]
+	mov	r3,r3,lsr#8
+	strb	r4,[r1,#4]
+	mov	r4,r4,lsr#8
+	strb	r5,[r1,#8]
+	mov	r5,r5,lsr#8
+	strb	r6,[r1,#12]
+	mov	r6,r6,lsr#8
+
+	strb	r3,[r1,#1]
+	mov	r3,r3,lsr#8
+	strb	r4,[r1,#5]
+	mov	r4,r4,lsr#8
+	strb	r5,[r1,#9]
+	mov	r5,r5,lsr#8
+	strb	r6,[r1,#13]
+	mov	r6,r6,lsr#8
+
+	strb	r3,[r1,#2]
+	mov	r3,r3,lsr#8
+	strb	r4,[r1,#6]
+	mov	r4,r4,lsr#8
+	strb	r5,[r1,#10]
+	mov	r5,r5,lsr#8
+	strb	r6,[r1,#14]
+	mov	r6,r6,lsr#8
+
+	strb	r3,[r1,#3]
+	strb	r4,[r1,#7]
+	strb	r5,[r1,#11]
+	strb	r6,[r1,#15]
+#endif
+	ldmia	sp!,{r4-r11}
+#if __LINUX_ARM_ARCH__ >= 5
+	bx	lr				@ bx	lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+ENDPROC(poly1305_emit_arm)
+
+
+#if __LINUX_ARM_ARCH__ >= 7
+.fpu	neon
+
+.align	5
+ENTRY(poly1305_init_neon)
+.Lpoly1305_init_neon:
+	ldr	r4,[r0,#20]		@ load key base 2^32
+	ldr	r5,[r0,#24]
+	ldr	r6,[r0,#28]
+	ldr	r7,[r0,#32]
+
+	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
+	mov	r3,r4,lsr#26
+	mov	r4,r5,lsr#20
+	orr	r3,r3,r5,lsl#6
+	mov	r5,r6,lsr#14
+	orr	r4,r4,r6,lsl#12
+	mov	r6,r7,lsr#8
+	orr	r5,r5,r7,lsl#18
+	and	r3,r3,#0x03ffffff
+	and	r4,r4,#0x03ffffff
+	and	r5,r5,#0x03ffffff
+
+	vdup.32	d0,r2			@ r^1 in both lanes
+	add	r2,r3,r3,lsl#2		@ *5
+	vdup.32	d1,r3
+	add	r3,r4,r4,lsl#2
+	vdup.32	d2,r2
+	vdup.32	d3,r4
+	add	r4,r5,r5,lsl#2
+	vdup.32	d4,r3
+	vdup.32	d5,r5
+	add	r5,r6,r6,lsl#2
+	vdup.32	d6,r4
+	vdup.32	d7,r6
+	vdup.32	d8,r5
+
+	mov	r5,#2		@ counter
+
+.Lsquare_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+
+	vmull.u32	q5,d0,d0[1]
+	vmull.u32	q6,d1,d0[1]
+	vmull.u32	q7,d3,d0[1]
+	vmull.u32	q8,d5,d0[1]
+	vmull.u32	q9,d7,d0[1]
+
+	vmlal.u32	q5,d7,d2[1]
+	vmlal.u32	q6,d0,d1[1]
+	vmlal.u32	q7,d1,d1[1]
+	vmlal.u32	q8,d3,d1[1]
+	vmlal.u32	q9,d5,d1[1]
+
+	vmlal.u32	q5,d5,d4[1]
+	vmlal.u32	q6,d7,d4[1]
+	vmlal.u32	q8,d1,d3[1]
+	vmlal.u32	q7,d0,d3[1]
+	vmlal.u32	q9,d3,d3[1]
+
+	vmlal.u32	q5,d3,d6[1]
+	vmlal.u32	q8,d0,d5[1]
+	vmlal.u32	q6,d5,d6[1]
+	vmlal.u32	q7,d7,d6[1]
+	vmlal.u32	q9,d1,d5[1]
+
+	vmlal.u32	q8,d7,d8[1]
+	vmlal.u32	q5,d1,d8[1]
+	vmlal.u32	q6,d3,d8[1]
+	vmlal.u32	q7,d5,d8[1]
+	vmlal.u32	q9,d0,d7[1]
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+	@ and P. Schwabe
+	@
+	@ H0>>+H1>>+H2>>+H3>>+H4
+	@ H3>>+H4>>*5+H0>>+H1
+	@
+	@ Trivia.
+	@
+	@ Result of multiplication of n-bit number by m-bit number is
+	@ n+m bits wide. However! Even though 2^n is a n+1-bit number,
+	@ m-bit number multiplied by 2^n is still n+m bits wide.
+	@
+	@ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
+	@ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
+	@ one is n+1 bits wide.
+	@
+	@ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
+	@ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
+	@ can be 27. However! In cases when their width exceeds 26 bits
+	@ they are limited by 2^26+2^6. This in turn means that *sum*
+	@ of the products with these values can still be viewed as sum
+	@ of 52-bit numbers as long as the amount of addends is not a
+	@ power of 2. For example,
+	@
+	@ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
+	@
+	@ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
+	@ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
+	@ 8 * (2^52) or 2^55. However, the value is then multiplied by
+	@ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
+	@ which is less than 32 * (2^52) or 2^57. And when processing
+	@ data we are looking at triple as many addends...
+	@
+	@ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
+	@ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
+	@ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
+	@ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
+	@ instruction accepts 2x32-bit input and writes 2x64-bit result.
+	@ This means that result of reduction have to be compressed upon
+	@ loop wrap-around. This can be done in the process of reduction
+	@ to minimize amount of instructions [as well as amount of
+	@ 128-bit instructions, which benefits low-end processors], but
+	@ one has to watch for H2 (which is narrower than H0) and 5*H4
+	@ not being wider than 58 bits, so that result of right shift
+	@ by 26 bits fits in 32 bits. This is also useful on x86,
+	@ because it allows to use paddd in place for paddq, which
+	@ benefits Atom, where paddq is ridiculously slow.
+
+	vshr.u64	q15,q8,#26
+	vmovn.i64	d16,q8
+	 vshr.u64	q4,q5,#26
+	 vmovn.i64	d10,q5
+	vadd.i64	q9,q9,q15		@ h3 -> h4
+	vbic.i32	d16,#0xfc000000	@ &=0x03ffffff
+	 vadd.i64	q6,q6,q4		@ h0 -> h1
+	 vbic.i32	d10,#0xfc000000
+
+	vshrn.u64	d30,q9,#26
+	vmovn.i64	d18,q9
+	 vshr.u64	q4,q6,#26
+	 vmovn.i64	d12,q6
+	 vadd.i64	q7,q7,q4		@ h1 -> h2
+	vbic.i32	d18,#0xfc000000
+	 vbic.i32	d12,#0xfc000000
+
+	vadd.i32	d10,d10,d30
+	vshl.u32	d30,d30,#2
+	 vshrn.u64	d8,q7,#26
+	 vmovn.i64	d14,q7
+	vadd.i32	d10,d10,d30	@ h4 -> h0
+	 vadd.i32	d16,d16,d8	@ h2 -> h3
+	 vbic.i32	d14,#0xfc000000
+
+	vshr.u32	d30,d10,#26
+	vbic.i32	d10,#0xfc000000
+	 vshr.u32	d8,d16,#26
+	 vbic.i32	d16,#0xfc000000
+	vadd.i32	d12,d12,d30	@ h0 -> h1
+	 vadd.i32	d18,d18,d8	@ h3 -> h4
+
+	subs		r5,r5,#1
+	beq		.Lsquare_break_neon
+
+	add		r6,r0,#(48+0*9*4)
+	add		r7,r0,#(48+1*9*4)
+
+	vtrn.32		d0,d10		@ r^2:r^1
+	vtrn.32		d3,d14
+	vtrn.32		d5,d16
+	vtrn.32		d1,d12
+	vtrn.32		d7,d18
+
+	vshl.u32	d4,d3,#2		@ *5
+	vshl.u32	d6,d5,#2
+	vshl.u32	d2,d1,#2
+	vshl.u32	d8,d7,#2
+	vadd.i32	d4,d4,d3
+	vadd.i32	d2,d2,d1
+	vadd.i32	d6,d6,d5
+	vadd.i32	d8,d8,d7
+
+	vst4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!
+	vst4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!
+	vst4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vst4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vst1.32		{d8[0]},[r6,:32]
+	vst1.32		{d8[1]},[r7,:32]
+
+	b		.Lsquare_neon
+
+.align	4
+.Lsquare_break_neon:
+	add		r6,r0,#(48+2*4*9)
+	add		r7,r0,#(48+3*4*9)
+
+	vmov		d0,d10		@ r^4:r^3
+	vshl.u32	d2,d12,#2		@ *5
+	vmov		d1,d12
+	vshl.u32	d4,d14,#2
+	vmov		d3,d14
+	vshl.u32	d6,d16,#2
+	vmov		d5,d16
+	vshl.u32	d8,d18,#2
+	vmov		d7,d18
+	vadd.i32	d2,d2,d12
+	vadd.i32	d4,d4,d14
+	vadd.i32	d6,d6,d16
+	vadd.i32	d8,d8,d18
+
+	vst4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!
+	vst4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!
+	vst4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vst4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vst1.32		{d8[0]},[r6]
+	vst1.32		{d8[1]},[r7]
+
+	bx	lr				@ bx	lr
+ENDPROC(poly1305_init_neon)
+
+.align	5
+ENTRY(poly1305_blocks_neon)
+	ldr	ip,[r0,#36]		@ is_base2_26
+	ands	r2,r2,#-16
+	beq	.Lno_data_neon
+
+	cmp	r2,#64
+	bhs	.Lenter_neon
+	tst	ip,ip			@ is_base2_26?
+	beq	.Lpoly1305_blocks_arm
+
+.Lenter_neon:
+	stmdb	sp!,{r4-r7}
+	vstmdb	sp!,{d8-d15}		@ ABI specification says so
+
+	tst	ip,ip			@ is_base2_26?
+	bne	.Lbase2_26_neon
+
+	stmdb	sp!,{r1-r3,lr}
+	bl	.Lpoly1305_init_neon
+
+	ldr	r4,[r0,#0]		@ load hash value base 2^32
+	ldr	r5,[r0,#4]
+	ldr	r6,[r0,#8]
+	ldr	r7,[r0,#12]
+	ldr	ip,[r0,#16]
+
+	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
+	mov	r3,r4,lsr#26
+	 veor	d10,d10,d10
+	mov	r4,r5,lsr#20
+	orr	r3,r3,r5,lsl#6
+	 veor	d12,d12,d12
+	mov	r5,r6,lsr#14
+	orr	r4,r4,r6,lsl#12
+	 veor	d14,d14,d14
+	mov	r6,r7,lsr#8
+	orr	r5,r5,r7,lsl#18
+	 veor	d16,d16,d16
+	and	r3,r3,#0x03ffffff
+	orr	r6,r6,ip,lsl#24
+	 veor	d18,d18,d18
+	and	r4,r4,#0x03ffffff
+	mov	r1,#1
+	and	r5,r5,#0x03ffffff
+	str	r1,[r0,#36]		@ is_base2_26
+
+	vmov.32	d10[0],r2
+	vmov.32	d12[0],r3
+	vmov.32	d14[0],r4
+	vmov.32	d16[0],r5
+	vmov.32	d18[0],r6
+	adr	r5,.Lzeros
+
+	ldmia	sp!,{r1-r3,lr}
+	b	.Lbase2_32_neon
+
+.align	4
+.Lbase2_26_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ load hash value
+
+	veor		d10,d10,d10
+	veor		d12,d12,d12
+	veor		d14,d14,d14
+	veor		d16,d16,d16
+	veor		d18,d18,d18
+	vld4.32		{d10[0],d12[0],d14[0],d16[0]},[r0]!
+	adr		r5,.Lzeros
+	vld1.32		{d18[0]},[r0]
+	sub		r0,r0,#16		@ rewind
+
+.Lbase2_32_neon:
+	add		r4,r1,#32
+	mov		r3,r3,lsl#24
+	tst		r2,#31
+	beq		.Leven
+
+	vld4.32		{d20[0],d22[0],d24[0],d26[0]},[r1]!
+	vmov.32		d28[0],r3
+	sub		r2,r2,#16
+	add		r4,r1,#32
+
+#ifdef	__ARMEB__
+	vrev32.8	q10,q10
+	vrev32.8	q13,q13
+	vrev32.8	q11,q11
+	vrev32.8	q12,q12
+#endif
+	vsri.u32	d28,d26,#8	@ base 2^32 -> base 2^26
+	vshl.u32	d26,d26,#18
+
+	vsri.u32	d26,d24,#14
+	vshl.u32	d24,d24,#12
+	vadd.i32	d29,d28,d18	@ add hash value and move to #hi
+
+	vbic.i32	d26,#0xfc000000
+	vsri.u32	d24,d22,#20
+	vshl.u32	d22,d22,#6
+
+	vbic.i32	d24,#0xfc000000
+	vsri.u32	d22,d20,#26
+	vadd.i32	d27,d26,d16
+
+	vbic.i32	d20,#0xfc000000
+	vbic.i32	d22,#0xfc000000
+	vadd.i32	d25,d24,d14
+
+	vadd.i32	d21,d20,d10
+	vadd.i32	d23,d22,d12
+
+	mov		r7,r5
+	add		r6,r0,#48
+
+	cmp		r2,r2
+	b		.Long_tail
+
+.align	4
+.Leven:
+	subs		r2,r2,#64
+	it		lo
+	movlo		r4,r5
+
+	vmov.i32	q14,#1<<24		@ padbit, yes, always
+	vld4.32		{d20,d22,d24,d26},[r1]	@ inp[0:1]
+	add		r1,r1,#64
+	vld4.32		{d21,d23,d25,d27},[r4]	@ inp[2:3] (or 0)
+	add		r4,r4,#64
+	itt		hi
+	addhi		r7,r0,#(48+1*9*4)
+	addhi		r6,r0,#(48+3*9*4)
+
+#ifdef	__ARMEB__
+	vrev32.8	q10,q10
+	vrev32.8	q13,q13
+	vrev32.8	q11,q11
+	vrev32.8	q12,q12
+#endif
+	vsri.u32	q14,q13,#8		@ base 2^32 -> base 2^26
+	vshl.u32	q13,q13,#18
+
+	vsri.u32	q13,q12,#14
+	vshl.u32	q12,q12,#12
+
+	vbic.i32	q13,#0xfc000000
+	vsri.u32	q12,q11,#20
+	vshl.u32	q11,q11,#6
+
+	vbic.i32	q12,#0xfc000000
+	vsri.u32	q11,q10,#26
+
+	vbic.i32	q10,#0xfc000000
+	vbic.i32	q11,#0xfc000000
+
+	bls		.Lskip_loop
+
+	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^2
+	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^4
+	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	b		.Loop_neon
+
+.align	5
+.Loop_neon:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+	@   ___________________/
+	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+	@   ___________________/ ____________________/
+	@
+	@ Note that we start with inp[2:3]*r^2. This is because it
+	@ doesn't depend on reduction in previous iteration.
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
+	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
+	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
+	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
+	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ inp[2:3]*r^2
+
+	vadd.i32	d24,d24,d14	@ accumulate inp[0:1]
+	vmull.u32	q7,d25,d0[1]
+	vadd.i32	d20,d20,d10
+	vmull.u32	q5,d21,d0[1]
+	vadd.i32	d26,d26,d16
+	vmull.u32	q8,d27,d0[1]
+	vmlal.u32	q7,d23,d1[1]
+	vadd.i32	d22,d22,d12
+	vmull.u32	q6,d23,d0[1]
+
+	vadd.i32	d28,d28,d18
+	vmull.u32	q9,d29,d0[1]
+	subs		r2,r2,#64
+	vmlal.u32	q5,d29,d2[1]
+	it		lo
+	movlo		r4,r5
+	vmlal.u32	q8,d25,d1[1]
+	vld1.32		d8[1],[r7,:32]
+	vmlal.u32	q6,d21,d1[1]
+	vmlal.u32	q9,d27,d1[1]
+
+	vmlal.u32	q5,d27,d4[1]
+	vmlal.u32	q8,d23,d3[1]
+	vmlal.u32	q9,d25,d3[1]
+	vmlal.u32	q6,d29,d4[1]
+	vmlal.u32	q7,d21,d3[1]
+
+	vmlal.u32	q8,d21,d5[1]
+	vmlal.u32	q5,d25,d6[1]
+	vmlal.u32	q9,d23,d5[1]
+	vmlal.u32	q6,d27,d6[1]
+	vmlal.u32	q7,d29,d6[1]
+
+	vmlal.u32	q8,d29,d8[1]
+	vmlal.u32	q5,d23,d8[1]
+	vmlal.u32	q9,d21,d7[1]
+	vmlal.u32	q6,d25,d8[1]
+	vmlal.u32	q7,d27,d8[1]
+
+	vld4.32		{d21,d23,d25,d27},[r4]	@ inp[2:3] (or 0)
+	add		r4,r4,#64
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ (hash+inp[0:1])*r^4 and accumulate
+
+	vmlal.u32	q8,d26,d0[0]
+	vmlal.u32	q5,d20,d0[0]
+	vmlal.u32	q9,d28,d0[0]
+	vmlal.u32	q6,d22,d0[0]
+	vmlal.u32	q7,d24,d0[0]
+	vld1.32		d8[0],[r6,:32]
+
+	vmlal.u32	q8,d24,d1[0]
+	vmlal.u32	q5,d28,d2[0]
+	vmlal.u32	q9,d26,d1[0]
+	vmlal.u32	q6,d20,d1[0]
+	vmlal.u32	q7,d22,d1[0]
+
+	vmlal.u32	q8,d22,d3[0]
+	vmlal.u32	q5,d26,d4[0]
+	vmlal.u32	q9,d24,d3[0]
+	vmlal.u32	q6,d28,d4[0]
+	vmlal.u32	q7,d20,d3[0]
+
+	vmlal.u32	q8,d20,d5[0]
+	vmlal.u32	q5,d24,d6[0]
+	vmlal.u32	q9,d22,d5[0]
+	vmlal.u32	q6,d26,d6[0]
+	vmlal.u32	q8,d28,d8[0]
+
+	vmlal.u32	q7,d28,d6[0]
+	vmlal.u32	q5,d22,d8[0]
+	vmlal.u32	q9,d20,d7[0]
+	vmov.i32	q14,#1<<24		@ padbit, yes, always
+	vmlal.u32	q6,d24,d8[0]
+	vmlal.u32	q7,d26,d8[0]
+
+	vld4.32		{d20,d22,d24,d26},[r1]	@ inp[0:1]
+	add		r1,r1,#64
+#ifdef	__ARMEB__
+	vrev32.8	q10,q10
+	vrev32.8	q11,q11
+	vrev32.8	q12,q12
+	vrev32.8	q13,q13
+#endif
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction interleaved with base 2^32 -> base 2^26 of
+	@ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14.
+
+	vshr.u64	q15,q8,#26
+	vmovn.i64	d16,q8
+	 vshr.u64	q4,q5,#26
+	 vmovn.i64	d10,q5
+	vadd.i64	q9,q9,q15		@ h3 -> h4
+	vbic.i32	d16,#0xfc000000
+	  vsri.u32	q14,q13,#8		@ base 2^32 -> base 2^26
+	 vadd.i64	q6,q6,q4		@ h0 -> h1
+	  vshl.u32	q13,q13,#18
+	 vbic.i32	d10,#0xfc000000
+
+	vshrn.u64	d30,q9,#26
+	vmovn.i64	d18,q9
+	 vshr.u64	q4,q6,#26
+	 vmovn.i64	d12,q6
+	 vadd.i64	q7,q7,q4		@ h1 -> h2
+	  vsri.u32	q13,q12,#14
+	vbic.i32	d18,#0xfc000000
+	  vshl.u32	q12,q12,#12
+	 vbic.i32	d12,#0xfc000000
+
+	vadd.i32	d10,d10,d30
+	vshl.u32	d30,d30,#2
+	  vbic.i32	q13,#0xfc000000
+	 vshrn.u64	d8,q7,#26
+	 vmovn.i64	d14,q7
+	vaddl.u32	q5,d10,d30	@ h4 -> h0 [widen for a sec]
+	  vsri.u32	q12,q11,#20
+	 vadd.i32	d16,d16,d8	@ h2 -> h3
+	  vshl.u32	q11,q11,#6
+	 vbic.i32	d14,#0xfc000000
+	  vbic.i32	q12,#0xfc000000
+
+	vshrn.u64	d30,q5,#26		@ re-narrow
+	vmovn.i64	d10,q5
+	  vsri.u32	q11,q10,#26
+	  vbic.i32	q10,#0xfc000000
+	 vshr.u32	d8,d16,#26
+	 vbic.i32	d16,#0xfc000000
+	vbic.i32	d10,#0xfc000000
+	vadd.i32	d12,d12,d30	@ h0 -> h1
+	 vadd.i32	d18,d18,d8	@ h3 -> h4
+	  vbic.i32	q11,#0xfc000000
+
+	bhi		.Loop_neon
+
+.Lskip_loop:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+	add		r7,r0,#(48+0*9*4)
+	add		r6,r0,#(48+1*9*4)
+	adds		r2,r2,#32
+	it		ne
+	movne		r2,#0
+	bne		.Long_tail
+
+	vadd.i32	d25,d24,d14	@ add hash value and move to #hi
+	vadd.i32	d21,d20,d10
+	vadd.i32	d27,d26,d16
+	vadd.i32	d23,d22,d12
+	vadd.i32	d29,d28,d18
+
+.Long_tail:
+	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^1
+	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^2
+
+	vadd.i32	d24,d24,d14	@ can be redundant
+	vmull.u32	q7,d25,d0
+	vadd.i32	d20,d20,d10
+	vmull.u32	q5,d21,d0
+	vadd.i32	d26,d26,d16
+	vmull.u32	q8,d27,d0
+	vadd.i32	d22,d22,d12
+	vmull.u32	q6,d23,d0
+	vadd.i32	d28,d28,d18
+	vmull.u32	q9,d29,d0
+
+	vmlal.u32	q5,d29,d2
+	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vmlal.u32	q8,d25,d1
+	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vmlal.u32	q6,d21,d1
+	vmlal.u32	q9,d27,d1
+	vmlal.u32	q7,d23,d1
+
+	vmlal.u32	q8,d23,d3
+	vld1.32		d8[1],[r7,:32]
+	vmlal.u32	q5,d27,d4
+	vld1.32		d8[0],[r6,:32]
+	vmlal.u32	q9,d25,d3
+	vmlal.u32	q6,d29,d4
+	vmlal.u32	q7,d21,d3
+
+	vmlal.u32	q8,d21,d5
+	 it		ne
+	 addne		r7,r0,#(48+2*9*4)
+	vmlal.u32	q5,d25,d6
+	 it		ne
+	 addne		r6,r0,#(48+3*9*4)
+	vmlal.u32	q9,d23,d5
+	vmlal.u32	q6,d27,d6
+	vmlal.u32	q7,d29,d6
+
+	vmlal.u32	q8,d29,d8
+	 vorn		q0,q0,q0	@ all-ones, can be redundant
+	vmlal.u32	q5,d23,d8
+	 vshr.u64	q0,q0,#38
+	vmlal.u32	q9,d21,d7
+	vmlal.u32	q6,d25,d8
+	vmlal.u32	q7,d27,d8
+
+	beq		.Lshort_tail
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ (hash+inp[0:1])*r^4:r^3 and accumulate
+
+	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^3
+	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^4
+
+	vmlal.u32	q7,d24,d0
+	vmlal.u32	q5,d20,d0
+	vmlal.u32	q8,d26,d0
+	vmlal.u32	q6,d22,d0
+	vmlal.u32	q9,d28,d0
+
+	vmlal.u32	q5,d28,d2
+	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
+	vmlal.u32	q8,d24,d1
+	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
+	vmlal.u32	q6,d20,d1
+	vmlal.u32	q9,d26,d1
+	vmlal.u32	q7,d22,d1
+
+	vmlal.u32	q8,d22,d3
+	vld1.32		d8[1],[r7,:32]
+	vmlal.u32	q5,d26,d4
+	vld1.32		d8[0],[r6,:32]
+	vmlal.u32	q9,d24,d3
+	vmlal.u32	q6,d28,d4
+	vmlal.u32	q7,d20,d3
+
+	vmlal.u32	q8,d20,d5
+	vmlal.u32	q5,d24,d6
+	vmlal.u32	q9,d22,d5
+	vmlal.u32	q6,d26,d6
+	vmlal.u32	q7,d28,d6
+
+	vmlal.u32	q8,d28,d8
+	 vorn		q0,q0,q0	@ all-ones
+	vmlal.u32	q5,d22,d8
+	 vshr.u64	q0,q0,#38
+	vmlal.u32	q9,d20,d7
+	vmlal.u32	q6,d24,d8
+	vmlal.u32	q7,d26,d8
+
+.Lshort_tail:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ horizontal addition
+
+	vadd.i64	d16,d16,d17
+	vadd.i64	d10,d10,d11
+	vadd.i64	d18,d18,d19
+	vadd.i64	d12,d12,d13
+	vadd.i64	d14,d14,d15
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ lazy reduction, but without narrowing
+
+	vshr.u64	q15,q8,#26
+	vand.i64	q8,q8,q0
+	 vshr.u64	q4,q5,#26
+	 vand.i64	q5,q5,q0
+	vadd.i64	q9,q9,q15		@ h3 -> h4
+	 vadd.i64	q6,q6,q4		@ h0 -> h1
+
+	vshr.u64	q15,q9,#26
+	vand.i64	q9,q9,q0
+	 vshr.u64	q4,q6,#26
+	 vand.i64	q6,q6,q0
+	 vadd.i64	q7,q7,q4		@ h1 -> h2
+
+	vadd.i64	q5,q5,q15
+	vshl.u64	q15,q15,#2
+	 vshr.u64	q4,q7,#26
+	 vand.i64	q7,q7,q0
+	vadd.i64	q5,q5,q15		@ h4 -> h0
+	 vadd.i64	q8,q8,q4		@ h2 -> h3
+
+	vshr.u64	q15,q5,#26
+	vand.i64	q5,q5,q0
+	 vshr.u64	q4,q8,#26
+	 vand.i64	q8,q8,q0
+	vadd.i64	q6,q6,q15		@ h0 -> h1
+	 vadd.i64	q9,q9,q4		@ h3 -> h4
+
+	cmp		r2,#0
+	bne		.Leven
+
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@ store hash value
+
+	vst4.32		{d10[0],d12[0],d14[0],d16[0]},[r0]!
+	vst1.32		{d18[0]},[r0]
+
+	vldmia	sp!,{d8-d15}			@ epilogue
+	ldmia	sp!,{r4-r7}
+.Lno_data_neon:
+	bx	lr					@ bx	lr
+ENDPROC(poly1305_blocks_neon)
+
+.align	5
+ENTRY(poly1305_emit_neon)
+	ldr	ip,[r0,#36]		@ is_base2_26
+
+	stmdb	sp!,{r4-r11}
+
+	tst	ip,ip
+	beq	.Lpoly1305_emit_enter
+
+	ldmia	r0,{r3-r7}
+	eor	r8,r8,r8
+
+	adds	r3,r3,r4,lsl#26	@ base 2^26 -> base 2^32
+	mov	r4,r4,lsr#6
+	adcs	r4,r4,r5,lsl#20
+	mov	r5,r5,lsr#12
+	adcs	r5,r5,r6,lsl#14
+	mov	r6,r6,lsr#18
+	adcs	r6,r6,r7,lsl#8
+	adc	r7,r8,r7,lsr#24	@ can be partially reduced ...
+
+	and	r8,r7,#-4		@ ... so reduce
+	and	r7,r6,#3
+	add	r8,r8,r8,lsr#2	@ *= 5
+	adds	r3,r3,r8
+	adcs	r4,r4,#0
+	adcs	r5,r5,#0
+	adcs	r6,r6,#0
+	adc	r7,r7,#0
+
+	adds	r8,r3,#5		@ compare to modulus
+	adcs	r9,r4,#0
+	adcs	r10,r5,#0
+	adcs	r11,r6,#0
+	adc	r7,r7,#0
+	tst	r7,#4			@ did it carry/borrow?
+
+	it	ne
+	movne	r3,r8
+	ldr	r8,[r2,#0]
+	it	ne
+	movne	r4,r9
+	ldr	r9,[r2,#4]
+	it	ne
+	movne	r5,r10
+	ldr	r10,[r2,#8]
+	it	ne
+	movne	r6,r11
+	ldr	r11,[r2,#12]
+
+	adds	r3,r3,r8		@ accumulate nonce
+	adcs	r4,r4,r9
+	adcs	r5,r5,r10
+	adc	r6,r6,r11
+
+#ifdef __ARMEB__
+	rev	r3,r3
+	rev	r4,r4
+	rev	r5,r5
+	rev	r6,r6
+#endif
+	str	r3,[r1,#0]		@ store the result
+	str	r4,[r1,#4]
+	str	r5,[r1,#8]
+	str	r6,[r1,#12]
+
+	ldmia	sp!,{r4-r11}
+	bx	lr				@ bx	lr
+ENDPROC(poly1305_emit_neon)
+
+.align	5
+.Lzeros:
+.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#endif
diff --git b/net/wireguard/crypto/poly1305-arm64.S b/net/wireguard/crypto/poly1305-arm64.S
new file mode 100644
index 0000000..ef54afd
--- /dev/null
+++ b/net/wireguard/crypto/poly1305-arm64.S
@@ -0,0 +1,820 @@
+/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+.text
+
+.align	5
+ENTRY(poly1305_init_arm)
+	cmp	x1,xzr
+	stp	xzr,xzr,[x0]		// zero hash value
+	stp	xzr,xzr,[x0,#16]	// [along with is_base2_26]
+
+	csel	x0,xzr,x0,eq
+	b.eq	.Lno_key
+
+	ldp	x7,x8,[x1]		// load key
+	mov	x9,#0xfffffffc0fffffff
+	movk	x9,#0x0fff,lsl#48
+#ifdef	__ARMEB__
+	rev	x7,x7			// flip bytes
+	rev	x8,x8
+#endif
+	and	x7,x7,x9		// &=0ffffffc0fffffff
+	and	x9,x9,#-4
+	and	x8,x8,x9		// &=0ffffffc0ffffffc
+	stp	x7,x8,[x0,#32]	// save key value
+
+.Lno_key:
+	ret
+ENDPROC(poly1305_init_arm)
+
+.align	5
+ENTRY(poly1305_blocks_arm)
+	ands	x2,x2,#-16
+	b.eq	.Lno_data
+
+	ldp	x4,x5,[x0]		// load hash value
+	ldp	x7,x8,[x0,#32]	// load key value
+	ldr	x6,[x0,#16]
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+	b	.Loop
+
+.align	5
+.Loop:
+	ldp	x10,x11,[x1],#16	// load input
+	sub	x2,x2,#16
+#ifdef	__ARMEB__
+	rev	x10,x10
+	rev	x11,x11
+#endif
+	adds	x4,x4,x10		// accumulate input
+	adcs	x5,x5,x11
+
+	mul	x12,x4,x7		// h0*r0
+	adc	x6,x6,x3
+	umulh	x13,x4,x7
+
+	mul	x10,x5,x9		// h1*5*r1
+	umulh	x11,x5,x9
+
+	adds	x12,x12,x10
+	mul	x10,x4,x8		// h0*r1
+	adc	x13,x13,x11
+	umulh	x14,x4,x8
+
+	adds	x13,x13,x10
+	mul	x10,x5,x7		// h1*r0
+	adc	x14,x14,xzr
+	umulh	x11,x5,x7
+
+	adds	x13,x13,x10
+	mul	x10,x6,x9		// h2*5*r1
+	adc	x14,x14,x11
+	mul	x11,x6,x7		// h2*r0
+
+	adds	x13,x13,x10
+	adc	x14,x14,x11
+
+	and	x10,x14,#-4		// final reduction
+	and	x6,x14,#3
+	add	x10,x10,x14,lsr#2
+	adds	x4,x12,x10
+	adcs	x5,x13,xzr
+	adc	x6,x6,xzr
+
+	cbnz	x2,.Loop
+
+	stp	x4,x5,[x0]		// store hash value
+	str	x6,[x0,#16]
+
+.Lno_data:
+	ret
+ENDPROC(poly1305_blocks_arm)
+
+.align	5
+ENTRY(poly1305_emit_arm)
+	ldp	x4,x5,[x0]		// load hash base 2^64
+	ldr	x6,[x0,#16]
+	ldp	x10,x11,[x2]	// load nonce
+
+	adds	x12,x4,#5		// compare to modulus
+	adcs	x13,x5,xzr
+	adc	x14,x6,xzr
+
+	tst	x14,#-4			// see if it's carried/borrowed
+
+	csel	x4,x4,x12,eq
+	csel	x5,x5,x13,eq
+
+#ifdef	__ARMEB__
+	ror	x10,x10,#32		// flip nonce words
+	ror	x11,x11,#32
+#endif
+	adds	x4,x4,x10		// accumulate nonce
+	adc	x5,x5,x11
+#ifdef	__ARMEB__
+	rev	x4,x4			// flip output bytes
+	rev	x5,x5
+#endif
+	stp	x4,x5,[x1]		// write result
+
+	ret
+ENDPROC(poly1305_emit_arm)
+
+.align	5
+__poly1305_mult:
+	mul	x12,x4,x7		// h0*r0
+	umulh	x13,x4,x7
+
+	mul	x10,x5,x9		// h1*5*r1
+	umulh	x11,x5,x9
+
+	adds	x12,x12,x10
+	mul	x10,x4,x8		// h0*r1
+	adc	x13,x13,x11
+	umulh	x14,x4,x8
+
+	adds	x13,x13,x10
+	mul	x10,x5,x7		// h1*r0
+	adc	x14,x14,xzr
+	umulh	x11,x5,x7
+
+	adds	x13,x13,x10
+	mul	x10,x6,x9		// h2*5*r1
+	adc	x14,x14,x11
+	mul	x11,x6,x7		// h2*r0
+
+	adds	x13,x13,x10
+	adc	x14,x14,x11
+
+	and	x10,x14,#-4		// final reduction
+	and	x6,x14,#3
+	add	x10,x10,x14,lsr#2
+	adds	x4,x12,x10
+	adcs	x5,x13,xzr
+	adc	x6,x6,xzr
+
+	ret
+
+__poly1305_splat:
+	and	x12,x4,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x13,x4,#26,#26
+	extr	x14,x5,x4,#52
+	and	x14,x14,#0x03ffffff
+	ubfx	x15,x5,#14,#26
+	extr	x16,x6,x5,#40
+
+	str	w12,[x0,#16*0]	// r0
+	add	w12,w13,w13,lsl#2	// r1*5
+	str	w13,[x0,#16*1]	// r1
+	add	w13,w14,w14,lsl#2	// r2*5
+	str	w12,[x0,#16*2]	// s1
+	str	w14,[x0,#16*3]	// r2
+	add	w14,w15,w15,lsl#2	// r3*5
+	str	w13,[x0,#16*4]	// s2
+	str	w15,[x0,#16*5]	// r3
+	add	w15,w16,w16,lsl#2	// r4*5
+	str	w14,[x0,#16*6]	// s3
+	str	w16,[x0,#16*7]	// r4
+	str	w15,[x0,#16*8]	// s4
+
+	ret
+
+.align	5
+ENTRY(poly1305_blocks_neon)
+	ldr	x17,[x0,#24]
+	cmp	x2,#128
+	b.hs	.Lblocks_neon
+	cbz	x17,poly1305_blocks_arm
+
+.Lblocks_neon:
+	stp	x29,x30,[sp,#-80]!
+	add	x29,sp,#0
+
+	ands	x2,x2,#-16
+	b.eq	.Lno_data_neon
+
+	cbz	x17,.Lbase2_64_neon
+
+	ldp	w10,w11,[x0]		// load hash value base 2^26
+	ldp	w12,w13,[x0,#8]
+	ldr	w14,[x0,#16]
+
+	tst	x2,#31
+	b.eq	.Leven_neon
+
+	ldp	x7,x8,[x0,#32]	// load key value
+
+	add	x4,x10,x11,lsl#26	// base 2^26 -> base 2^64
+	lsr	x5,x12,#12
+	adds	x4,x4,x12,lsl#52
+	add	x5,x5,x13,lsl#14
+	adc	x5,x5,xzr
+	lsr	x6,x14,#24
+	adds	x5,x5,x14,lsl#40
+	adc	x14,x6,xzr		// can be partially reduced...
+
+	ldp	x12,x13,[x1],#16	// load input
+	sub	x2,x2,#16
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+
+	and	x10,x14,#-4		// ... so reduce
+	and	x6,x14,#3
+	add	x10,x10,x14,lsr#2
+	adds	x4,x4,x10
+	adcs	x5,x5,xzr
+	adc	x6,x6,xzr
+
+#ifdef	__ARMEB__
+	rev	x12,x12
+	rev	x13,x13
+#endif
+	adds	x4,x4,x12		// accumulate input
+	adcs	x5,x5,x13
+	adc	x6,x6,x3
+
+	bl	__poly1305_mult
+	ldr	x30,[sp,#8]
+
+	cbz	x3,.Lstore_base2_64_neon
+
+	and	x10,x4,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x11,x4,#26,#26
+	extr	x12,x5,x4,#52
+	and	x12,x12,#0x03ffffff
+	ubfx	x13,x5,#14,#26
+	extr	x14,x6,x5,#40
+
+	cbnz	x2,.Leven_neon
+
+	stp	w10,w11,[x0]		// store hash value base 2^26
+	stp	w12,w13,[x0,#8]
+	str	w14,[x0,#16]
+	b	.Lno_data_neon
+
+.align	4
+.Lstore_base2_64_neon:
+	stp	x4,x5,[x0]		// store hash value base 2^64
+	stp	x6,xzr,[x0,#16]	// note that is_base2_26 is zeroed
+	b	.Lno_data_neon
+
+.align	4
+.Lbase2_64_neon:
+	ldp	x7,x8,[x0,#32]	// load key value
+
+	ldp	x4,x5,[x0]		// load hash value base 2^64
+	ldr	x6,[x0,#16]
+
+	tst	x2,#31
+	b.eq	.Linit_neon
+
+	ldp	x12,x13,[x1],#16	// load input
+	sub	x2,x2,#16
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+#ifdef	__ARMEB__
+	rev	x12,x12
+	rev	x13,x13
+#endif
+	adds	x4,x4,x12		// accumulate input
+	adcs	x5,x5,x13
+	adc	x6,x6,x3
+
+	bl	__poly1305_mult
+
+.Linit_neon:
+	and	x10,x4,#0x03ffffff	// base 2^64 -> base 2^26
+	ubfx	x11,x4,#26,#26
+	extr	x12,x5,x4,#52
+	and	x12,x12,#0x03ffffff
+	ubfx	x13,x5,#14,#26
+	extr	x14,x6,x5,#40
+
+	stp	d8,d9,[sp,#16]		// meet ABI requirements
+	stp	d10,d11,[sp,#32]
+	stp	d12,d13,[sp,#48]
+	stp	d14,d15,[sp,#64]
+
+	fmov	d24,x10
+	fmov	d25,x11
+	fmov	d26,x12
+	fmov	d27,x13
+	fmov	d28,x14
+
+	////////////////////////////////// initialize r^n table
+	mov	x4,x7			// r^1
+	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
+	mov	x5,x8
+	mov	x6,xzr
+	add	x0,x0,#48+12
+	bl	__poly1305_splat
+
+	bl	__poly1305_mult		// r^2
+	sub	x0,x0,#4
+	bl	__poly1305_splat
+
+	bl	__poly1305_mult		// r^3
+	sub	x0,x0,#4
+	bl	__poly1305_splat
+
+	bl	__poly1305_mult		// r^4
+	sub	x0,x0,#4
+	bl	__poly1305_splat
+	ldr	x30,[sp,#8]
+
+	add	x16,x1,#32
+	adr	x17,.Lzeros
+	subs	x2,x2,#64
+	csel	x16,x17,x16,lo
+
+	mov	x4,#1
+	str	x4,[x0,#-24]		// set is_base2_26
+	sub	x0,x0,#48		// restore original x0
+	b	.Ldo_neon
+
+.align	4
+.Leven_neon:
+	add	x16,x1,#32
+	adr	x17,.Lzeros
+	subs	x2,x2,#64
+	csel	x16,x17,x16,lo
+
+	stp	d8,d9,[sp,#16]		// meet ABI requirements
+	stp	d10,d11,[sp,#32]
+	stp	d12,d13,[sp,#48]
+	stp	d14,d15,[sp,#64]
+
+	fmov	d24,x10
+	fmov	d25,x11
+	fmov	d26,x12
+	fmov	d27,x13
+	fmov	d28,x14
+
+.Ldo_neon:
+	ldp	x8,x12,[x16],#16	// inp[2:3] (or zero)
+	ldp	x9,x13,[x16],#48
+
+	lsl	x3,x3,#24
+	add	x15,x0,#48
+
+#ifdef	__ARMEB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	and	x5,x9,#0x03ffffff
+	ubfx	x6,x8,#26,#26
+	ubfx	x7,x9,#26,#26
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	extr	x8,x12,x8,#52
+	extr	x9,x13,x9,#52
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	fmov	d14,x4
+	and	x8,x8,#0x03ffffff
+	and	x9,x9,#0x03ffffff
+	ubfx	x10,x12,#14,#26
+	ubfx	x11,x13,#14,#26
+	add	x12,x3,x12,lsr#40
+	add	x13,x3,x13,lsr#40
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	fmov	d15,x6
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	fmov	d16,x8
+	fmov	d17,x10
+	fmov	d18,x12
+
+	ldp	x8,x12,[x1],#16	// inp[0:1]
+	ldp	x9,x13,[x1],#48
+
+	ld1	{v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
+	ld1	{v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
+	ld1	{v8.4s},[x15]
+
+#ifdef	__ARMEB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	and	x5,x9,#0x03ffffff
+	ubfx	x6,x8,#26,#26
+	ubfx	x7,x9,#26,#26
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	extr	x8,x12,x8,#52
+	extr	x9,x13,x9,#52
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	fmov	d9,x4
+	and	x8,x8,#0x03ffffff
+	and	x9,x9,#0x03ffffff
+	ubfx	x10,x12,#14,#26
+	ubfx	x11,x13,#14,#26
+	add	x12,x3,x12,lsr#40
+	add	x13,x3,x13,lsr#40
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	fmov	d10,x6
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	movi	v31.2d,#-1
+	fmov	d11,x8
+	fmov	d12,x10
+	fmov	d13,x12
+	ushr	v31.2d,v31.2d,#38
+
+	b.ls	.Lskip_loop
+
+.align	4
+.Loop_neon:
+	////////////////////////////////////////////////////////////////
+	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+	//   ___________________/
+	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+	//   ___________________/ ____________________/
+	//
+	// Note that we start with inp[2:3]*r^2. This is because it
+	// doesn't depend on reduction in previous iteration.
+	////////////////////////////////////////////////////////////////
+	// d4 = h0*r4 + h1*r3   + h2*r2   + h3*r1   + h4*r0
+	// d3 = h0*r3 + h1*r2   + h2*r1   + h3*r0   + h4*5*r4
+	// d2 = h0*r2 + h1*r1   + h2*r0   + h3*5*r4 + h4*5*r3
+	// d1 = h0*r1 + h1*r0   + h2*5*r4 + h3*5*r3 + h4*5*r2
+	// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
+
+	subs	x2,x2,#64
+	umull	v23.2d,v14.2s,v7.s[2]
+	csel	x16,x17,x16,lo
+	umull	v22.2d,v14.2s,v5.s[2]
+	umull	v21.2d,v14.2s,v3.s[2]
+	ldp	x8,x12,[x16],#16	// inp[2:3] (or zero)
+	umull	v20.2d,v14.2s,v1.s[2]
+	ldp	x9,x13,[x16],#48
+	umull	v19.2d,v14.2s,v0.s[2]
+#ifdef	__ARMEB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+
+	umlal	v23.2d,v15.2s,v5.s[2]
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	umlal	v22.2d,v15.2s,v3.s[2]
+	and	x5,x9,#0x03ffffff
+	umlal	v21.2d,v15.2s,v1.s[2]
+	ubfx	x6,x8,#26,#26
+	umlal	v20.2d,v15.2s,v0.s[2]
+	ubfx	x7,x9,#26,#26
+	umlal	v19.2d,v15.2s,v8.s[2]
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+
+	umlal	v23.2d,v16.2s,v3.s[2]
+	extr	x8,x12,x8,#52
+	umlal	v22.2d,v16.2s,v1.s[2]
+	extr	x9,x13,x9,#52
+	umlal	v21.2d,v16.2s,v0.s[2]
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	umlal	v20.2d,v16.2s,v8.s[2]
+	fmov	d14,x4
+	umlal	v19.2d,v16.2s,v6.s[2]
+	and	x8,x8,#0x03ffffff
+
+	umlal	v23.2d,v17.2s,v1.s[2]
+	and	x9,x9,#0x03ffffff
+	umlal	v22.2d,v17.2s,v0.s[2]
+	ubfx	x10,x12,#14,#26
+	umlal	v21.2d,v17.2s,v8.s[2]
+	ubfx	x11,x13,#14,#26
+	umlal	v20.2d,v17.2s,v6.s[2]
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	umlal	v19.2d,v17.2s,v4.s[2]
+	fmov	d15,x6
+
+	add	v11.2s,v11.2s,v26.2s
+	add	x12,x3,x12,lsr#40
+	umlal	v23.2d,v18.2s,v0.s[2]
+	add	x13,x3,x13,lsr#40
+	umlal	v22.2d,v18.2s,v8.s[2]
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	umlal	v21.2d,v18.2s,v6.s[2]
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	umlal	v20.2d,v18.2s,v4.s[2]
+	fmov	d16,x8
+	umlal	v19.2d,v18.2s,v2.s[2]
+	fmov	d17,x10
+
+	////////////////////////////////////////////////////////////////
+	// (hash+inp[0:1])*r^4 and accumulate
+
+	add	v9.2s,v9.2s,v24.2s
+	fmov	d18,x12
+	umlal	v22.2d,v11.2s,v1.s[0]
+	ldp	x8,x12,[x1],#16	// inp[0:1]
+	umlal	v19.2d,v11.2s,v6.s[0]
+	ldp	x9,x13,[x1],#48
+	umlal	v23.2d,v11.2s,v3.s[0]
+	umlal	v20.2d,v11.2s,v8.s[0]
+	umlal	v21.2d,v11.2s,v0.s[0]
+#ifdef	__ARMEB__
+	rev	x8,x8
+	rev	x12,x12
+	rev	x9,x9
+	rev	x13,x13
+#endif
+
+	add	v10.2s,v10.2s,v25.2s
+	umlal	v22.2d,v9.2s,v5.s[0]
+	umlal	v23.2d,v9.2s,v7.s[0]
+	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
+	umlal	v21.2d,v9.2s,v3.s[0]
+	and	x5,x9,#0x03ffffff
+	umlal	v19.2d,v9.2s,v0.s[0]
+	ubfx	x6,x8,#26,#26
+	umlal	v20.2d,v9.2s,v1.s[0]
+	ubfx	x7,x9,#26,#26
+
+	add	v12.2s,v12.2s,v27.2s
+	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
+	umlal	v22.2d,v10.2s,v3.s[0]
+	extr	x8,x12,x8,#52
+	umlal	v23.2d,v10.2s,v5.s[0]
+	extr	x9,x13,x9,#52
+	umlal	v19.2d,v10.2s,v8.s[0]
+	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
+	umlal	v21.2d,v10.2s,v1.s[0]
+	fmov	d9,x4
+	umlal	v20.2d,v10.2s,v0.s[0]
+	and	x8,x8,#0x03ffffff
+
+	add	v13.2s,v13.2s,v28.2s
+	and	x9,x9,#0x03ffffff
+	umlal	v22.2d,v12.2s,v0.s[0]
+	ubfx	x10,x12,#14,#26
+	umlal	v19.2d,v12.2s,v4.s[0]
+	ubfx	x11,x13,#14,#26
+	umlal	v23.2d,v12.2s,v1.s[0]
+	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
+	umlal	v20.2d,v12.2s,v6.s[0]
+	fmov	d10,x6
+	umlal	v21.2d,v12.2s,v8.s[0]
+	add	x12,x3,x12,lsr#40
+
+	umlal	v22.2d,v13.2s,v8.s[0]
+	add	x13,x3,x13,lsr#40
+	umlal	v19.2d,v13.2s,v2.s[0]
+	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
+	umlal	v23.2d,v13.2s,v0.s[0]
+	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
+	umlal	v20.2d,v13.2s,v4.s[0]
+	fmov	d11,x8
+	umlal	v21.2d,v13.2s,v6.s[0]
+	fmov	d12,x10
+	fmov	d13,x12
+
+	/////////////////////////////////////////////////////////////////
+	// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+	// and P. Schwabe
+	//
+	// [see discussion in poly1305-armv4 module]
+
+	ushr	v29.2d,v22.2d,#26
+	xtn	v27.2s,v22.2d
+	ushr	v30.2d,v19.2d,#26
+	and	v19.16b,v19.16b,v31.16b
+	add	v23.2d,v23.2d,v29.2d	// h3 -> h4
+	bic	v27.2s,#0xfc,lsl#24	// &=0x03ffffff
+	add	v20.2d,v20.2d,v30.2d	// h0 -> h1
+
+	ushr	v29.2d,v23.2d,#26
+	xtn	v28.2s,v23.2d
+	ushr	v30.2d,v20.2d,#26
+	xtn	v25.2s,v20.2d
+	bic	v28.2s,#0xfc,lsl#24
+	add	v21.2d,v21.2d,v30.2d	// h1 -> h2
+
+	add	v19.2d,v19.2d,v29.2d
+	shl	v29.2d,v29.2d,#2
+	shrn	v30.2s,v21.2d,#26
+	xtn	v26.2s,v21.2d
+	add	v19.2d,v19.2d,v29.2d	// h4 -> h0
+	bic	v25.2s,#0xfc,lsl#24
+	add	v27.2s,v27.2s,v30.2s		// h2 -> h3
+	bic	v26.2s,#0xfc,lsl#24
+
+	shrn	v29.2s,v19.2d,#26
+	xtn	v24.2s,v19.2d
+	ushr	v30.2s,v27.2s,#26
+	bic	v27.2s,#0xfc,lsl#24
+	bic	v24.2s,#0xfc,lsl#24
+	add	v25.2s,v25.2s,v29.2s		// h0 -> h1
+	add	v28.2s,v28.2s,v30.2s		// h3 -> h4
+
+	b.hi	.Loop_neon
+
+.Lskip_loop:
+	dup	v16.2d,v16.d[0]
+	add	v11.2s,v11.2s,v26.2s
+
+	////////////////////////////////////////////////////////////////
+	// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+	adds	x2,x2,#32
+	b.ne	.Long_tail
+
+	dup	v16.2d,v11.d[0]
+	add	v14.2s,v9.2s,v24.2s
+	add	v17.2s,v12.2s,v27.2s
+	add	v15.2s,v10.2s,v25.2s
+	add	v18.2s,v13.2s,v28.2s
+
+.Long_tail:
+	dup	v14.2d,v14.d[0]
+	umull2	v19.2d,v16.4s,v6.4s
+	umull2	v22.2d,v16.4s,v1.4s
+	umull2	v23.2d,v16.4s,v3.4s
+	umull2	v21.2d,v16.4s,v0.4s
+	umull2	v20.2d,v16.4s,v8.4s
+
+	dup	v15.2d,v15.d[0]
+	umlal2	v19.2d,v14.4s,v0.4s
+	umlal2	v21.2d,v14.4s,v3.4s
+	umlal2	v22.2d,v14.4s,v5.4s
+	umlal2	v23.2d,v14.4s,v7.4s
+	umlal2	v20.2d,v14.4s,v1.4s
+
+	dup	v17.2d,v17.d[0]
+	umlal2	v19.2d,v15.4s,v8.4s
+	umlal2	v22.2d,v15.4s,v3.4s
+	umlal2	v21.2d,v15.4s,v1.4s
+	umlal2	v23.2d,v15.4s,v5.4s
+	umlal2	v20.2d,v15.4s,v0.4s
+
+	dup	v18.2d,v18.d[0]
+	umlal2	v22.2d,v17.4s,v0.4s
+	umlal2	v23.2d,v17.4s,v1.4s
+	umlal2	v19.2d,v17.4s,v4.4s
+	umlal2	v20.2d,v17.4s,v6.4s
+	umlal2	v21.2d,v17.4s,v8.4s
+
+	umlal2	v22.2d,v18.4s,v8.4s
+	umlal2	v19.2d,v18.4s,v2.4s
+	umlal2	v23.2d,v18.4s,v0.4s
+	umlal2	v20.2d,v18.4s,v4.4s
+	umlal2	v21.2d,v18.4s,v6.4s
+
+	b.eq	.Lshort_tail
+
+	////////////////////////////////////////////////////////////////
+	// (hash+inp[0:1])*r^4:r^3 and accumulate
+
+	add	v9.2s,v9.2s,v24.2s
+	umlal	v22.2d,v11.2s,v1.2s
+	umlal	v19.2d,v11.2s,v6.2s
+	umlal	v23.2d,v11.2s,v3.2s
+	umlal	v20.2d,v11.2s,v8.2s
+	umlal	v21.2d,v11.2s,v0.2s
+
+	add	v10.2s,v10.2s,v25.2s
+	umlal	v22.2d,v9.2s,v5.2s
+	umlal	v19.2d,v9.2s,v0.2s
+	umlal	v23.2d,v9.2s,v7.2s
+	umlal	v20.2d,v9.2s,v1.2s
+	umlal	v21.2d,v9.2s,v3.2s
+
+	add	v12.2s,v12.2s,v27.2s
+	umlal	v22.2d,v10.2s,v3.2s
+	umlal	v19.2d,v10.2s,v8.2s
+	umlal	v23.2d,v10.2s,v5.2s
+	umlal	v20.2d,v10.2s,v0.2s
+	umlal	v21.2d,v10.2s,v1.2s
+
+	add	v13.2s,v13.2s,v28.2s
+	umlal	v22.2d,v12.2s,v0.2s
+	umlal	v19.2d,v12.2s,v4.2s
+	umlal	v23.2d,v12.2s,v1.2s
+	umlal	v20.2d,v12.2s,v6.2s
+	umlal	v21.2d,v12.2s,v8.2s
+
+	umlal	v22.2d,v13.2s,v8.2s
+	umlal	v19.2d,v13.2s,v2.2s
+	umlal	v23.2d,v13.2s,v0.2s
+	umlal	v20.2d,v13.2s,v4.2s
+	umlal	v21.2d,v13.2s,v6.2s
+
+.Lshort_tail:
+	////////////////////////////////////////////////////////////////
+	// horizontal add
+
+	addp	v22.2d,v22.2d,v22.2d
+	ldp	d8,d9,[sp,#16]		// meet ABI requirements
+	addp	v19.2d,v19.2d,v19.2d
+	ldp	d10,d11,[sp,#32]
+	addp	v23.2d,v23.2d,v23.2d
+	ldp	d12,d13,[sp,#48]
+	addp	v20.2d,v20.2d,v20.2d
+	ldp	d14,d15,[sp,#64]
+	addp	v21.2d,v21.2d,v21.2d
+
+	////////////////////////////////////////////////////////////////
+	// lazy reduction, but without narrowing
+
+	ushr	v29.2d,v22.2d,#26
+	and	v22.16b,v22.16b,v31.16b
+	ushr	v30.2d,v19.2d,#26
+	and	v19.16b,v19.16b,v31.16b
+
+	add	v23.2d,v23.2d,v29.2d	// h3 -> h4
+	add	v20.2d,v20.2d,v30.2d	// h0 -> h1
+
+	ushr	v29.2d,v23.2d,#26
+	and	v23.16b,v23.16b,v31.16b
+	ushr	v30.2d,v20.2d,#26
+	and	v20.16b,v20.16b,v31.16b
+	add	v21.2d,v21.2d,v30.2d	// h1 -> h2
+
+	add	v19.2d,v19.2d,v29.2d
+	shl	v29.2d,v29.2d,#2
+	ushr	v30.2d,v21.2d,#26
+	and	v21.16b,v21.16b,v31.16b
+	add	v19.2d,v19.2d,v29.2d	// h4 -> h0
+	add	v22.2d,v22.2d,v30.2d	// h2 -> h3
+
+	ushr	v29.2d,v19.2d,#26
+	and	v19.16b,v19.16b,v31.16b
+	ushr	v30.2d,v22.2d,#26
+	and	v22.16b,v22.16b,v31.16b
+	add	v20.2d,v20.2d,v29.2d	// h0 -> h1
+	add	v23.2d,v23.2d,v30.2d	// h3 -> h4
+
+	////////////////////////////////////////////////////////////////
+	// write the result, can be partially reduced
+
+	st4	{v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
+	st1	{v23.s}[0],[x0]
+
+.Lno_data_neon:
+	ldr	x29,[sp],#80
+	ret
+ENDPROC(poly1305_blocks_neon)
+
+.align	5
+ENTRY(poly1305_emit_neon)
+	ldr	x17,[x0,#24]
+	cbz	x17,poly1305_emit_arm
+
+	ldp	w10,w11,[x0]		// load hash value base 2^26
+	ldp	w12,w13,[x0,#8]
+	ldr	w14,[x0,#16]
+
+	add	x4,x10,x11,lsl#26	// base 2^26 -> base 2^64
+	lsr	x5,x12,#12
+	adds	x4,x4,x12,lsl#52
+	add	x5,x5,x13,lsl#14
+	adc	x5,x5,xzr
+	lsr	x6,x14,#24
+	adds	x5,x5,x14,lsl#40
+	adc	x6,x6,xzr		// can be partially reduced...
+
+	ldp	x10,x11,[x2]	// load nonce
+
+	and	x12,x6,#-4		// ... so reduce
+	add	x12,x12,x6,lsr#2
+	and	x6,x6,#3
+	adds	x4,x4,x12
+	adcs	x5,x5,xzr
+	adc	x6,x6,xzr
+
+	adds	x12,x4,#5		// compare to modulus
+	adcs	x13,x5,xzr
+	adc	x14,x6,xzr
+
+	tst	x14,#-4			// see if it's carried/borrowed
+
+	csel	x4,x4,x12,eq
+	csel	x5,x5,x13,eq
+
+#ifdef	__ARMEB__
+	ror	x10,x10,#32		// flip nonce words
+	ror	x11,x11,#32
+#endif
+	adds	x4,x4,x10		// accumulate nonce
+	adc	x5,x5,x11
+#ifdef	__ARMEB__
+	rev	x4,x4			// flip output bytes
+	rev	x5,x5
+#endif
+	stp	x4,x5,[x1]		// write result
+
+	ret
+ENDPROC(poly1305_emit_neon)
+
+.align	5
+.Lzeros:
+.long	0,0,0,0,0,0,0,0
diff --git b/net/wireguard/crypto/poly1305-mips64.S b/net/wireguard/crypto/poly1305-mips64.S
new file mode 100644
index 0000000..26a9a6a
--- /dev/null
+++ b/net/wireguard/crypto/poly1305-mips64.S
@@ -0,0 +1,357 @@
+/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+ */
+
+#if !defined(CONFIG_64BIT)
+#error "This is only for 64-bit kernels."
+#endif
+
+#ifdef __MIPSEB__
+#define MSB 0
+#define LSB 7
+#else
+#define MSB 7
+#define LSB 0
+#endif
+
+#if defined(CONFIG_CPU_MIPS64_R6) || defined(CONFIG_CPU_MIPSR6)
+#define dmultu(rs,rt)
+#define mflo(rd,rs,rt)	dmulu	rd,rs,rt
+#define mfhi(rd,rs,rt)	dmuhu	rd,rs,rt
+#else
+#define dmultu(rs,rt)		dmultu	rs,rt
+#define multu(rs,rt)		multu	rs,rt
+#define mflo(rd,rs,rt)	mflo	rd
+#define mfhi(rd,rs,rt)	mfhi	rd
+#endif
+
+.text
+.set	noat
+.set	noreorder
+
+/* While most of the assembly in the kernel prefers ENTRY() and ENDPROC(),
+ * there is no existing MIPS assembly that uses it, and MIPS assembler seems
+ * to like its own .ent/.end notation, which the MIPS include files don't
+ * provide in a MIPS-specific ENTRY/ENDPROC definition. So, we skip these
+ * for now, until somebody complains. */
+
+.align	5
+.globl	poly1305_init_mips
+.ent	poly1305_init_mips
+poly1305_init_mips:
+	.frame	$29,0,$31
+	.set	reorder
+
+	sd	$0,0($4)
+	sd	$0,8($4)
+	sd	$0,16($4)
+
+	beqz	$5,.Lno_key
+
+#if defined(CONFIG_CPU_MIPS64_R6) || defined(CONFIG_CPU_MIPSR6)
+	ld	$8,0($5)
+	ld	$9,8($5)
+#else
+	ldl	$8,0+MSB($5)
+	ldl	$9,8+MSB($5)
+	ldr	$8,0+LSB($5)
+	ldr	$9,8+LSB($5)
+#endif
+#ifdef	__MIPSEB__
+#if defined(CONFIG_CPU_MIPS64_R2) || defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPS64_R6) || defined(CONFIG_CPU_MIPSR6)
+	dsbh	$8,$8		# byte swap
+	 dsbh	$9,$9
+	dshd	$8,$8
+	 dshd	$9,$9
+#else
+	ori	$10,$0,0xFF
+	dsll	$1,$10,32
+	or	$10,$1		# 0x000000FF000000FF
+
+	and	$11,$8,$10	# byte swap
+	 and	$2,$9,$10
+	dsrl	$1,$8,24
+	 dsrl	$24,$9,24
+	dsll	$11,24
+	 dsll	$2,24
+	and	$1,$10
+	 and	$24,$10
+	dsll	$10,8			# 0x0000FF000000FF00
+	or	$11,$1
+	 or	$2,$24
+	and	$1,$8,$10
+	 and	$24,$9,$10
+	dsrl	$8,8
+	 dsrl	$9,8
+	dsll	$1,8
+	 dsll	$24,8
+	and	$8,$10
+	 and	$9,$10
+	or	$11,$1
+	 or	$2,$24
+	or	$8,$11
+	 or	$9,$2
+	dsrl	$11,$8,32
+	 dsrl	$2,$9,32
+	dsll	$8,32
+	 dsll	$9,32
+	or	$8,$11
+	 or	$9,$2
+#endif
+#endif
+	li	$10,1
+	dsll	$10,32
+	daddiu	$10,-63
+	dsll	$10,28
+	daddiu	$10,-1		# 0ffffffc0fffffff
+
+	and	$8,$10
+	daddiu	$10,-3		# 0ffffffc0ffffffc
+	and	$9,$10
+
+	sd	$8,24($4)
+	dsrl	$10,$9,2
+	sd	$9,32($4)
+	daddu	$10,$9		# s1 = r1 + (r1 >> 2)
+	sd	$10,40($4)
+
+.Lno_key:
+	li	$2,0			# return 0
+	jr	$31
+.end	poly1305_init_mips
+
+.align	5
+.globl	poly1305_blocks_mips
+.ent	poly1305_blocks_mips
+poly1305_blocks_mips:
+	.set	noreorder
+	dsrl	$6,4			# number of complete blocks
+	bnez	$6,poly1305_blocks_internal
+	nop
+	jr	$31
+	nop
+.end	poly1305_blocks_mips
+
+.align	5
+.ent	poly1305_blocks_internal
+poly1305_blocks_internal:
+	.frame	$29,6*8,$31
+	.mask	0x00030000,-8
+	.set	noreorder
+	dsubu	$29,6*8
+	sd	$17,40($29)
+	sd	$16,32($29)
+	.set	reorder
+
+	ld	$12,0($4)		# load hash value
+	ld	$13,8($4)
+	ld	$14,16($4)
+
+	ld	$15,24($4)		# load key
+	ld	$16,32($4)
+	ld	$17,40($4)
+
+.Loop:
+#if defined(CONFIG_CPU_MIPS64_R6) || defined(CONFIG_CPU_MIPSR6)
+	ld	$8,0($5)		# load input
+	ld	$9,8($5)
+#else
+	ldl	$8,0+MSB($5)	# load input
+	ldl	$9,8+MSB($5)
+	ldr	$8,0+LSB($5)
+	ldr	$9,8+LSB($5)
+#endif
+	daddiu	$6,-1
+	daddiu	$5,16
+#ifdef	__MIPSEB__
+#if defined(CONFIG_CPU_MIPS64_R2) || defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPS64_R6) || defined(CONFIG_CPU_MIPSR6)
+	dsbh	$8,$8		# byte swap
+	 dsbh	$9,$9
+	dshd	$8,$8
+	 dshd	$9,$9
+#else
+	ori	$10,$0,0xFF
+	dsll	$1,$10,32
+	or	$10,$1		# 0x000000FF000000FF
+
+	and	$11,$8,$10	# byte swap
+	 and	$2,$9,$10
+	dsrl	$1,$8,24
+	 dsrl	$24,$9,24
+	dsll	$11,24
+	 dsll	$2,24
+	and	$1,$10
+	 and	$24,$10
+	dsll	$10,8			# 0x0000FF000000FF00
+	or	$11,$1
+	 or	$2,$24
+	and	$1,$8,$10
+	 and	$24,$9,$10
+	dsrl	$8,8
+	 dsrl	$9,8
+	dsll	$1,8
+	 dsll	$24,8
+	and	$8,$10
+	 and	$9,$10
+	or	$11,$1
+	 or	$2,$24
+	or	$8,$11
+	 or	$9,$2
+	dsrl	$11,$8,32
+	 dsrl	$2,$9,32
+	dsll	$8,32
+	 dsll	$9,32
+	or	$8,$11
+	 or	$9,$2
+#endif
+#endif
+	daddu	$12,$8		# accumulate input
+	daddu	$13,$9
+	sltu	$10,$12,$8
+	sltu	$11,$13,$9
+	daddu	$13,$10
+
+	dmultu	($15,$12)		# h0*r0
+	 daddu	$14,$7
+	 sltu	$10,$13,$10
+	mflo	($8,$15,$12)
+	mfhi	($9,$15,$12)
+
+	dmultu	($17,$13)		# h1*5*r1
+	 daddu	$10,$11
+	 daddu	$14,$10
+	mflo	($10,$17,$13)
+	mfhi	($11,$17,$13)
+
+	dmultu	($16,$12)		# h0*r1
+	 daddu	$8,$10
+	 daddu	$9,$11
+	mflo	($1,$16,$12)
+	mfhi	($25,$16,$12)
+	 sltu	$10,$8,$10
+	 daddu	$9,$10
+
+	dmultu	($15,$13)		# h1*r0
+	 daddu	$9,$1
+	 sltu	$1,$9,$1
+	mflo	($10,$15,$13)
+	mfhi	($11,$15,$13)
+	 daddu	$25,$1
+
+	dmultu	($17,$14)		# h2*5*r1
+	 daddu	$9,$10
+	 daddu	$25,$11
+	mflo	($1,$17,$14)
+
+	dmultu	($15,$14)		# h2*r0
+	 sltu	$10,$9,$10
+	 daddu	$25,$10
+	mflo	($2,$15,$14)
+
+	daddu	$9,$1
+	daddu	$25,$2
+	sltu	$1,$9,$1
+	daddu	$25,$1
+
+	li	$10,-4		# final reduction
+	and	$10,$25
+	dsrl	$11,$25,2
+	andi	$14,$25,3
+	daddu	$10,$11
+	daddu	$12,$8,$10
+	sltu	$10,$12,$10
+	daddu	$13,$9,$10
+	sltu	$10,$13,$10
+	daddu	$14,$14,$10
+
+	bnez	$6,.Loop
+
+	sd	$12,0($4)		# store hash value
+	sd	$13,8($4)
+	sd	$14,16($4)
+
+	.set	noreorder
+	ld	$17,40($29)		# epilogue
+	ld	$16,32($29)
+	jr	$31
+	daddu	$29,6*8
+.end	poly1305_blocks_internal
+
+.align	5
+.globl	poly1305_emit_mips
+.ent	poly1305_emit_mips
+poly1305_emit_mips:
+	.frame	$29,0,$31
+	.set	reorder
+
+	ld	$10,0($4)
+	ld	$11,8($4)
+	ld	$1,16($4)
+
+	daddiu	$8,$10,5		# compare to modulus
+	sltiu	$2,$8,5
+	daddu	$9,$11,$2
+	sltu	$2,$9,$2
+	daddu	$1,$1,$2
+
+	dsrl	$1,2			# see if it carried/borrowed
+	dsubu	$1,$0,$1
+	nor	$2,$0,$1
+
+	and	$8,$1
+	and	$10,$2
+	and	$9,$1
+	and	$11,$2
+	or	$8,$10
+	or	$9,$11
+
+	lwu	$10,0($6)		# load nonce
+	lwu	$11,4($6)
+	lwu	$1,8($6)
+	lwu	$2,12($6)
+	dsll	$11,32
+	dsll	$2,32
+	or	$10,$11
+	or	$1,$2
+
+	daddu	$8,$10		# accumulate nonce
+	daddu	$9,$1
+	sltu	$10,$8,$10
+	daddu	$9,$10
+
+	dsrl	$10,$8,8		# write mac value
+	dsrl	$11,$8,16
+	dsrl	$1,$8,24
+	sb	$8,0($5)
+	dsrl	$2,$8,32
+	sb	$10,1($5)
+	dsrl	$10,$8,40
+	sb	$11,2($5)
+	dsrl	$11,$8,48
+	sb	$1,3($5)
+	dsrl	$1,$8,56
+	sb	$2,4($5)
+	dsrl	$2,$9,8
+	sb	$10,5($5)
+	dsrl	$10,$9,16
+	sb	$11,6($5)
+	dsrl	$11,$9,24
+	sb	$1,7($5)
+
+	sb	$9,8($5)
+	dsrl	$1,$9,32
+	sb	$2,9($5)
+	dsrl	$2,$9,40
+	sb	$10,10($5)
+	dsrl	$10,$9,48
+	sb	$11,11($5)
+	dsrl	$11,$9,56
+	sb	$1,12($5)
+	sb	$2,13($5)
+	sb	$10,14($5)
+	sb	$11,15($5)
+
+	jr	$31
+.end	poly1305_emit_mips
diff --git b/net/wireguard/crypto/poly1305-x86_64.S b/net/wireguard/crypto/poly1305-x86_64.S
new file mode 100644
index 0000000..347afd8
--- /dev/null
+++ b/net/wireguard/crypto/poly1305-x86_64.S
@@ -0,0 +1,2803 @@
+/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
+ *
+ * Copyright (C) 2017 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+
+.section .rodata.cst32.Lmask24, "aM", @progbits, 32
+.align	64
+.Lconst:
+.Lmask24:
+.long	0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
+.section .rodata.cst32.L129, "aM", @progbits, 32
+.align	32
+.L129:
+.long	16777216,0,16777216,0,16777216,0,16777216,0
+.section .rodata.cst32.Lmask26, "aM", @progbits, 32
+.align	32
+.Lmask26:
+.long	0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
+.section .rodata.cst32.Lpermd_avx2, "aM", @progbits, 32
+.align	32
+.Lpermd_avx2:
+.long	2,2,2,3,2,0,2,1
+.section .rodata.cst64.Lpermd_avx512, "aM", @progbits, 64
+.align	64
+.Lpermd_avx512:
+.long	0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
+
+.text
+
+.align	32
+ENTRY(poly1305_init_x86_64)
+	xorq	%rax,%rax
+	movq	%rax,0(%rdi)
+	movq	%rax,8(%rdi)
+	movq	%rax,16(%rdi)
+
+	cmpq	$0,%rsi
+	je	.Lno_key
+
+	movq	$0x0ffffffc0fffffff,%rax
+	movq	$0x0ffffffc0ffffffc,%rcx
+	andq	0(%rsi),%rax
+	andq	8(%rsi),%rcx
+	movq	%rax,24(%rdi)
+	movq	%rcx,32(%rdi)
+	movl	$1,%eax
+.Lno_key:
+	ret
+ENDPROC(poly1305_init_x86_64)
+
+.align	32
+ENTRY(poly1305_blocks_x86_64)
+.Lblocks:
+	shrq	$4,%rdx
+	jz	.Lno_data
+
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	pushq	%rdi
+
+.Lblocks_body:
+
+	movq	%rdx,%r15
+
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%r13
+
+	movq	0(%rdi),%r14
+	movq	8(%rdi),%rbx
+	movq	16(%rdi),%r10
+
+	movq	%r13,%r12
+	shrq	$2,%r13
+	movq	%r12,%rax
+	addq	%r12,%r13
+	jmp	.Loop
+
+.align	32
+.Loop:
+
+	addq	0(%rsi),%r14
+	adcq	8(%rsi),%rbx
+	leaq	16(%rsi),%rsi
+	adcq	%rcx,%r10
+	mulq	%r14
+	movq	%rax,%r9
+	movq	%r11,%rax
+	movq	%rdx,%rdi
+
+	mulq	%r14
+	movq	%rax,%r14
+	movq	%r11,%rax
+	movq	%rdx,%r8
+
+	mulq	%rbx
+	addq	%rax,%r9
+	movq	%r13,%rax
+	adcq	%rdx,%rdi
+
+	mulq	%rbx
+	movq	%r10,%rbx
+	addq	%rax,%r14
+	adcq	%rdx,%r8
+
+	imulq	%r13,%rbx
+	addq	%rbx,%r9
+	movq	%r8,%rbx
+	adcq	$0,%rdi
+
+	imulq	%r11,%r10
+	addq	%r9,%rbx
+	movq	$-4,%rax
+	adcq	%r10,%rdi
+
+	andq	%rdi,%rax
+	movq	%rdi,%r10
+	shrq	$2,%rdi
+	andq	$3,%r10
+	addq	%rdi,%rax
+	addq	%rax,%r14
+	adcq	$0,%rbx
+	adcq	$0,%r10
+
+	movq	%r12,%rax
+	decq	%r15
+	jnz	.Loop
+
+	movq	0(%rsp),%rdi
+
+	movq	%r14,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%r10,16(%rdi)
+
+	movq	8(%rsp),%r15
+	movq	16(%rsp),%r14
+	movq	24(%rsp),%r13
+	movq	32(%rsp),%r12
+	movq	40(%rsp),%rbx
+	leaq	48(%rsp),%rsp
+.Lno_data:
+.Lblocks_epilogue:
+	ret
+ENDPROC(poly1305_blocks_x86_64)
+
+.align	32
+ENTRY(poly1305_emit_x86_64)
+.Lemit:
+	movq	0(%rdi),%r8
+	movq	8(%rdi),%r9
+	movq	16(%rdi),%r10
+
+	movq	%r8,%rax
+	addq	$5,%r8
+	movq	%r9,%rcx
+	adcq	$0,%r9
+	adcq	$0,%r10
+	shrq	$2,%r10
+	cmovnzq	%r8,%rax
+	cmovnzq	%r9,%rcx
+
+	addq	0(%rdx),%rax
+	adcq	8(%rdx),%rcx
+	movq	%rax,0(%rsi)
+	movq	%rcx,8(%rsi)
+
+	ret
+ENDPROC(poly1305_emit_x86_64)
+
+.macro __poly1305_block
+	mulq	%r14
+	movq	%rax,%r9
+	movq	%r11,%rax
+	movq	%rdx,%rdi
+
+	mulq	%r14
+	movq	%rax,%r14
+	movq	%r11,%rax
+	movq	%rdx,%r8
+
+	mulq	%rbx
+	addq	%rax,%r9
+	movq	%r13,%rax
+	adcq	%rdx,%rdi
+
+	mulq	%rbx
+	movq	%r10,%rbx
+	addq	%rax,%r14
+	adcq	%rdx,%r8
+
+	imulq	%r13,%rbx
+	addq	%rbx,%r9
+	movq	%r8,%rbx
+	adcq	$0,%rdi
+
+	imulq	%r11,%r10
+	addq	%r9,%rbx
+	movq	$-4,%rax
+	adcq	%r10,%rdi
+
+	andq	%rdi,%rax
+	movq	%rdi,%r10
+	shrq	$2,%rdi
+	andq	$3,%r10
+	addq	%rdi,%rax
+	addq	%rax,%r14
+	adcq	$0,%rbx
+	adcq	$0,%r10
+.endm
+
+.macro __poly1305_init_avx
+	movq	%r11,%r14
+	movq	%r12,%rbx
+	xorq	%r10,%r10
+
+	leaq	48+64(%rdi),%rdi
+
+	movq	%r12,%rax
+	movq	%rdi,0(%rsp)
+	__poly1305_block
+	movq	0(%rsp),%rdi
+
+	movl	$0x3ffffff,%eax
+	movl	$0x3ffffff,%edx
+	movq	%r14,%r8
+	andl	%r14d,%eax
+	movq	%r11,%r9
+	andl	%r11d,%edx
+	movl	%eax,-64(%rdi)
+	shrq	$26,%r8
+	movl	%edx,-60(%rdi)
+	shrq	$26,%r9
+
+	movl	$0x3ffffff,%eax
+	movl	$0x3ffffff,%edx
+	andl	%r8d,%eax
+	andl	%r9d,%edx
+	movl	%eax,-48(%rdi)
+	leal	(%rax,%rax,4),%eax
+	movl	%edx,-44(%rdi)
+	leal	(%rdx,%rdx,4),%edx
+	movl	%eax,-32(%rdi)
+	shrq	$26,%r8
+	movl	%edx,-28(%rdi)
+	shrq	$26,%r9
+
+	movq	%rbx,%rax
+	movq	%r12,%rdx
+	shlq	$12,%rax
+	shlq	$12,%rdx
+	orq	%r8,%rax
+	orq	%r9,%rdx
+	andl	$0x3ffffff,%eax
+	andl	$0x3ffffff,%edx
+	movl	%eax,-16(%rdi)
+	leal	(%rax,%rax,4),%eax
+	movl	%edx,-12(%rdi)
+	leal	(%rdx,%rdx,4),%edx
+	movl	%eax,0(%rdi)
+	movq	%rbx,%r8
+	movl	%edx,4(%rdi)
+	movq	%r12,%r9
+
+	movl	$0x3ffffff,%eax
+	movl	$0x3ffffff,%edx
+	shrq	$14,%r8
+	shrq	$14,%r9
+	andl	%r8d,%eax
+	andl	%r9d,%edx
+	movl	%eax,16(%rdi)
+	leal	(%rax,%rax,4),%eax
+	movl	%edx,20(%rdi)
+	leal	(%rdx,%rdx,4),%edx
+	movl	%eax,32(%rdi)
+	shrq	$26,%r8
+	movl	%edx,36(%rdi)
+	shrq	$26,%r9
+
+	movq	%r10,%rax
+	shlq	$24,%rax
+	orq	%rax,%r8
+	movl	%r8d,48(%rdi)
+	leaq	(%r8,%r8,4),%r8
+	movl	%r9d,52(%rdi)
+	leaq	(%r9,%r9,4),%r9
+	movl	%r8d,64(%rdi)
+	movl	%r9d,68(%rdi)
+
+	movq	%r12,%rax
+	movq	%rdi,0(%rsp)
+	__poly1305_block
+	movq	0(%rsp),%rdi
+
+	movl	$0x3ffffff,%eax
+	movq	%r14,%r8
+	andl	%r14d,%eax
+	shrq	$26,%r8
+	movl	%eax,-52(%rdi)
+
+	movl	$0x3ffffff,%edx
+	andl	%r8d,%edx
+	movl	%edx,-36(%rdi)
+	leal	(%rdx,%rdx,4),%edx
+	shrq	$26,%r8
+	movl	%edx,-20(%rdi)
+
+	movq	%rbx,%rax
+	shlq	$12,%rax
+	orq	%r8,%rax
+	andl	$0x3ffffff,%eax
+	movl	%eax,-4(%rdi)
+	leal	(%rax,%rax,4),%eax
+	movq	%rbx,%r8
+	movl	%eax,12(%rdi)
+
+	movl	$0x3ffffff,%edx
+	shrq	$14,%r8
+	andl	%r8d,%edx
+	movl	%edx,28(%rdi)
+	leal	(%rdx,%rdx,4),%edx
+	shrq	$26,%r8
+	movl	%edx,44(%rdi)
+
+	movq	%r10,%rax
+	shlq	$24,%rax
+	orq	%rax,%r8
+	movl	%r8d,60(%rdi)
+	leaq	(%r8,%r8,4),%r8
+	movl	%r8d,76(%rdi)
+
+	movq	%r12,%rax
+	movq	%rdi,0(%rsp)
+	__poly1305_block
+	movq	0(%rsp),%rdi
+
+	movl	$0x3ffffff,%eax
+	movq	%r14,%r8
+	andl	%r14d,%eax
+	shrq	$26,%r8
+	movl	%eax,-56(%rdi)
+
+	movl	$0x3ffffff,%edx
+	andl	%r8d,%edx
+	movl	%edx,-40(%rdi)
+	leal	(%rdx,%rdx,4),%edx
+	shrq	$26,%r8
+	movl	%edx,-24(%rdi)
+
+	movq	%rbx,%rax
+	shlq	$12,%rax
+	orq	%r8,%rax
+	andl	$0x3ffffff,%eax
+	movl	%eax,-8(%rdi)
+	leal	(%rax,%rax,4),%eax
+	movq	%rbx,%r8
+	movl	%eax,8(%rdi)
+
+	movl	$0x3ffffff,%edx
+	shrq	$14,%r8
+	andl	%r8d,%edx
+	movl	%edx,24(%rdi)
+	leal	(%rdx,%rdx,4),%edx
+	shrq	$26,%r8
+	movl	%edx,40(%rdi)
+
+	movq	%r10,%rax
+	shlq	$24,%rax
+	orq	%rax,%r8
+	movl	%r8d,56(%rdi)
+	leaq	(%r8,%r8,4),%r8
+	movl	%r8d,72(%rdi)
+
+	leaq	-48-64(%rdi),%rdi
+.endm
+
+#ifdef CONFIG_AS_AVX
+.align	32
+ENTRY(poly1305_blocks_avx)
+
+	movl	20(%rdi),%r8d
+	cmpq	$128,%rdx
+	jae	.Lblocks_avx
+	testl	%r8d,%r8d
+	jz	.Lblocks
+
+.Lblocks_avx:
+	andq	$-16,%rdx
+	jz	.Lno_data_avx
+
+	vzeroupper
+
+	testl	%r8d,%r8d
+	jz	.Lbase2_64_avx
+
+	testq	$31,%rdx
+	jz	.Leven_avx
+
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	pushq	%rdi
+
+.Lblocks_avx_body:
+
+	movq	%rdx,%r15
+
+	movq	0(%rdi),%r8
+	movq	8(%rdi),%r9
+	movl	16(%rdi),%r10d
+
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%r13
+
+
+	movl	%r8d,%r14d
+	andq	$-2147483648,%r8
+	movq	%r9,%r12
+	movl	%r9d,%ebx
+	andq	$-2147483648,%r9
+
+	shrq	$6,%r8
+	shlq	$52,%r12
+	addq	%r8,%r14
+	shrq	$12,%rbx
+	shrq	$18,%r9
+	addq	%r12,%r14
+	adcq	%r9,%rbx
+
+	movq	%r10,%r8
+	shlq	$40,%r8
+	shrq	$24,%r10
+	addq	%r8,%rbx
+	adcq	$0,%r10
+
+	movq	$-4,%r9
+	movq	%r10,%r8
+	andq	%r10,%r9
+	shrq	$2,%r8
+	andq	$3,%r10
+	addq	%r9,%r8
+	addq	%r8,%r14
+	adcq	$0,%rbx
+	adcq	$0,%r10
+
+	movq	%r13,%r12
+	movq	%r13,%rax
+	shrq	$2,%r13
+	addq	%r12,%r13
+
+	addq	0(%rsi),%r14
+	adcq	8(%rsi),%rbx
+	leaq	16(%rsi),%rsi
+	adcq	%rcx,%r10
+
+	movq	%rdi,0(%rsp)
+	__poly1305_block
+	movq	0(%rsp),%rdi
+
+	testq	%rcx,%rcx
+	jz	.Lstore_base2_64_avx
+
+
+	movq	%r14,%rax
+	movq	%r14,%rdx
+	shrq	$52,%r14
+	movq	%rbx,%r11
+	movq	%rbx,%r12
+	shrq	$26,%rdx
+	andq	$0x3ffffff,%rax
+	shlq	$12,%r11
+	andq	$0x3ffffff,%rdx
+	shrq	$14,%rbx
+	orq	%r11,%r14
+	shlq	$24,%r10
+	andq	$0x3ffffff,%r14
+	shrq	$40,%r12
+	andq	$0x3ffffff,%rbx
+	orq	%r12,%r10
+
+	subq	$16,%r15
+	jz	.Lstore_base2_26_avx
+
+	vmovd	%eax,%xmm0
+	vmovd	%edx,%xmm1
+	vmovd	%r14d,%xmm2
+	vmovd	%ebx,%xmm3
+	vmovd	%r10d,%xmm4
+	jmp	.Lproceed_avx
+
+.align	32
+.Lstore_base2_64_avx:
+	movq	%r14,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%r10,16(%rdi)
+	jmp	.Ldone_avx
+
+.align	16
+.Lstore_base2_26_avx:
+	movl	%eax,0(%rdi)
+	movl	%edx,4(%rdi)
+	movl	%r14d,8(%rdi)
+	movl	%ebx,12(%rdi)
+	movl	%r10d,16(%rdi)
+.align	16
+.Ldone_avx:
+	movq	8(%rsp),%r15
+	movq	16(%rsp),%r14
+	movq	24(%rsp),%r13
+	movq	32(%rsp),%r12
+	movq	40(%rsp),%rbx
+	leaq	48(%rsp),%rsp
+
+.Lno_data_avx:
+.Lblocks_avx_epilogue:
+	ret
+
+.align	32
+.Lbase2_64_avx:
+
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	pushq	%rdi
+
+.Lbase2_64_avx_body:
+
+	movq	%rdx,%r15
+
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%r13
+
+	movq	0(%rdi),%r14
+	movq	8(%rdi),%rbx
+	movl	16(%rdi),%r10d
+
+	movq	%r13,%r12
+	movq	%r13,%rax
+	shrq	$2,%r13
+	addq	%r12,%r13
+
+	testq	$31,%rdx
+	jz	.Linit_avx
+
+	addq	0(%rsi),%r14
+	adcq	8(%rsi),%rbx
+	leaq	16(%rsi),%rsi
+	adcq	%rcx,%r10
+	subq	$16,%r15
+
+	movq	%rdi,0(%rsp)
+	__poly1305_block
+	movq	0(%rsp),%rdi
+
+.Linit_avx:
+
+	movq	%r14,%rax
+	movq	%r14,%rdx
+	shrq	$52,%r14
+	movq	%rbx,%r8
+	movq	%rbx,%r9
+	shrq	$26,%rdx
+	andq	$0x3ffffff,%rax
+	shlq	$12,%r8
+	andq	$0x3ffffff,%rdx
+	shrq	$14,%rbx
+	orq	%r8,%r14
+	shlq	$24,%r10
+	andq	$0x3ffffff,%r14
+	shrq	$40,%r9
+	andq	$0x3ffffff,%rbx
+	orq	%r9,%r10
+
+	vmovd	%eax,%xmm0
+	vmovd	%edx,%xmm1
+	vmovd	%r14d,%xmm2
+	vmovd	%ebx,%xmm3
+	vmovd	%r10d,%xmm4
+	movl	$1,20(%rdi)
+
+	__poly1305_init_avx
+
+.Lproceed_avx:
+	movq	%r15,%rdx
+
+	movq	8(%rsp),%r15
+	movq	16(%rsp),%r14
+	movq	24(%rsp),%r13
+	movq	32(%rsp),%r12
+	movq	40(%rsp),%rbx
+	leaq	48(%rsp),%rax
+	leaq	48(%rsp),%rsp
+
+.Lbase2_64_avx_epilogue:
+	jmp	.Ldo_avx
+
+
+.align	32
+.Leven_avx:
+	vmovd	0(%rdi),%xmm0
+	vmovd	4(%rdi),%xmm1
+	vmovd	8(%rdi),%xmm2
+	vmovd	12(%rdi),%xmm3
+	vmovd	16(%rdi),%xmm4
+
+.Ldo_avx:
+	leaq	8(%rsp),%r10
+	andq	$-32,%rsp
+	subq	$8,%rsp
+	leaq	-88(%rsp),%r11
+	subq	$0x178,%rsp
+	subq	$64,%rdx
+	leaq	-32(%rsi),%rax
+	cmovcq	%rax,%rsi
+
+	vmovdqu	48(%rdi),%xmm14
+	leaq	112(%rdi),%rdi
+	leaq	.Lconst(%rip),%rcx
+
+	vmovdqu	32(%rsi),%xmm5
+	vmovdqu	48(%rsi),%xmm6
+	vmovdqa	64(%rcx),%xmm15
+
+	vpsrldq	$6,%xmm5,%xmm7
+	vpsrldq	$6,%xmm6,%xmm8
+	vpunpckhqdq	%xmm6,%xmm5,%xmm9
+	vpunpcklqdq	%xmm6,%xmm5,%xmm5
+	vpunpcklqdq	%xmm8,%xmm7,%xmm8
+
+	vpsrlq	$40,%xmm9,%xmm9
+	vpsrlq	$26,%xmm5,%xmm6
+	vpand	%xmm15,%xmm5,%xmm5
+	vpsrlq	$4,%xmm8,%xmm7
+	vpand	%xmm15,%xmm6,%xmm6
+	vpsrlq	$30,%xmm8,%xmm8
+	vpand	%xmm15,%xmm7,%xmm7
+	vpand	%xmm15,%xmm8,%xmm8
+	vpor	32(%rcx),%xmm9,%xmm9
+
+	jbe	.Lskip_loop_avx
+
+
+	vmovdqu	-48(%rdi),%xmm11
+	vmovdqu	-32(%rdi),%xmm12
+	vpshufd	$0xEE,%xmm14,%xmm13
+	vpshufd	$0x44,%xmm14,%xmm10
+	vmovdqa	%xmm13,-144(%r11)
+	vmovdqa	%xmm10,0(%rsp)
+	vpshufd	$0xEE,%xmm11,%xmm14
+	vmovdqu	-16(%rdi),%xmm10
+	vpshufd	$0x44,%xmm11,%xmm11
+	vmovdqa	%xmm14,-128(%r11)
+	vmovdqa	%xmm11,16(%rsp)
+	vpshufd	$0xEE,%xmm12,%xmm13
+	vmovdqu	0(%rdi),%xmm11
+	vpshufd	$0x44,%xmm12,%xmm12
+	vmovdqa	%xmm13,-112(%r11)
+	vmovdqa	%xmm12,32(%rsp)
+	vpshufd	$0xEE,%xmm10,%xmm14
+	vmovdqu	16(%rdi),%xmm12
+	vpshufd	$0x44,%xmm10,%xmm10
+	vmovdqa	%xmm14,-96(%r11)
+	vmovdqa	%xmm10,48(%rsp)
+	vpshufd	$0xEE,%xmm11,%xmm13
+	vmovdqu	32(%rdi),%xmm10
+	vpshufd	$0x44,%xmm11,%xmm11
+	vmovdqa	%xmm13,-80(%r11)
+	vmovdqa	%xmm11,64(%rsp)
+	vpshufd	$0xEE,%xmm12,%xmm14
+	vmovdqu	48(%rdi),%xmm11
+	vpshufd	$0x44,%xmm12,%xmm12
+	vmovdqa	%xmm14,-64(%r11)
+	vmovdqa	%xmm12,80(%rsp)
+	vpshufd	$0xEE,%xmm10,%xmm13
+	vmovdqu	64(%rdi),%xmm12
+	vpshufd	$0x44,%xmm10,%xmm10
+	vmovdqa	%xmm13,-48(%r11)
+	vmovdqa	%xmm10,96(%rsp)
+	vpshufd	$0xEE,%xmm11,%xmm14
+	vpshufd	$0x44,%xmm11,%xmm11
+	vmovdqa	%xmm14,-32(%r11)
+	vmovdqa	%xmm11,112(%rsp)
+	vpshufd	$0xEE,%xmm12,%xmm13
+	vmovdqa	0(%rsp),%xmm14
+	vpshufd	$0x44,%xmm12,%xmm12
+	vmovdqa	%xmm13,-16(%r11)
+	vmovdqa	%xmm12,128(%rsp)
+
+	jmp	.Loop_avx
+
+.align	32
+.Loop_avx:
+
+	vpmuludq	%xmm5,%xmm14,%xmm10
+	vpmuludq	%xmm6,%xmm14,%xmm11
+	vmovdqa	%xmm2,32(%r11)
+	vpmuludq	%xmm7,%xmm14,%xmm12
+	vmovdqa	16(%rsp),%xmm2
+	vpmuludq	%xmm8,%xmm14,%xmm13
+	vpmuludq	%xmm9,%xmm14,%xmm14
+
+	vmovdqa	%xmm0,0(%r11)
+	vpmuludq	32(%rsp),%xmm9,%xmm0
+	vmovdqa	%xmm1,16(%r11)
+	vpmuludq	%xmm8,%xmm2,%xmm1
+	vpaddq	%xmm0,%xmm10,%xmm10
+	vpaddq	%xmm1,%xmm14,%xmm14
+	vmovdqa	%xmm3,48(%r11)
+	vpmuludq	%xmm7,%xmm2,%xmm0
+	vpmuludq	%xmm6,%xmm2,%xmm1
+	vpaddq	%xmm0,%xmm13,%xmm13
+	vmovdqa	48(%rsp),%xmm3
+	vpaddq	%xmm1,%xmm12,%xmm12
+	vmovdqa	%xmm4,64(%r11)
+	vpmuludq	%xmm5,%xmm2,%xmm2
+	vpmuludq	%xmm7,%xmm3,%xmm0
+	vpaddq	%xmm2,%xmm11,%xmm11
+
+	vmovdqa	64(%rsp),%xmm4
+	vpaddq	%xmm0,%xmm14,%xmm14
+	vpmuludq	%xmm6,%xmm3,%xmm1
+	vpmuludq	%xmm5,%xmm3,%xmm3
+	vpaddq	%xmm1,%xmm13,%xmm13
+	vmovdqa	80(%rsp),%xmm2
+	vpaddq	%xmm3,%xmm12,%xmm12
+	vpmuludq	%xmm9,%xmm4,%xmm0
+	vpmuludq	%xmm8,%xmm4,%xmm4
+	vpaddq	%xmm0,%xmm11,%xmm11
+	vmovdqa	96(%rsp),%xmm3
+	vpaddq	%xmm4,%xmm10,%xmm10
+
+	vmovdqa	128(%rsp),%xmm4
+	vpmuludq	%xmm6,%xmm2,%xmm1
+	vpmuludq	%xmm5,%xmm2,%xmm2
+	vpaddq	%xmm1,%xmm14,%xmm14
+	vpaddq	%xmm2,%xmm13,%xmm13
+	vpmuludq	%xmm9,%xmm3,%xmm0
+	vpmuludq	%xmm8,%xmm3,%xmm1
+	vpaddq	%xmm0,%xmm12,%xmm12
+	vmovdqu	0(%rsi),%xmm0
+	vpaddq	%xmm1,%xmm11,%xmm11
+	vpmuludq	%xmm7,%xmm3,%xmm3
+	vpmuludq	%xmm7,%xmm4,%xmm7
+	vpaddq	%xmm3,%xmm10,%xmm10
+
+	vmovdqu	16(%rsi),%xmm1
+	vpaddq	%xmm7,%xmm11,%xmm11
+	vpmuludq	%xmm8,%xmm4,%xmm8
+	vpmuludq	%xmm9,%xmm4,%xmm9
+	vpsrldq	$6,%xmm0,%xmm2
+	vpaddq	%xmm8,%xmm12,%xmm12
+	vpaddq	%xmm9,%xmm13,%xmm13
+	vpsrldq	$6,%xmm1,%xmm3
+	vpmuludq	112(%rsp),%xmm5,%xmm9
+	vpmuludq	%xmm6,%xmm4,%xmm5
+	vpunpckhqdq	%xmm1,%xmm0,%xmm4
+	vpaddq	%xmm9,%xmm14,%xmm14
+	vmovdqa	-144(%r11),%xmm9
+	vpaddq	%xmm5,%xmm10,%xmm10
+
+	vpunpcklqdq	%xmm1,%xmm0,%xmm0
+	vpunpcklqdq	%xmm3,%xmm2,%xmm3
+
+
+	vpsrldq	$5,%xmm4,%xmm4
+	vpsrlq	$26,%xmm0,%xmm1
+	vpand	%xmm15,%xmm0,%xmm0
+	vpsrlq	$4,%xmm3,%xmm2
+	vpand	%xmm15,%xmm1,%xmm1
+	vpand	0(%rcx),%xmm4,%xmm4
+	vpsrlq	$30,%xmm3,%xmm3
+	vpand	%xmm15,%xmm2,%xmm2
+	vpand	%xmm15,%xmm3,%xmm3
+	vpor	32(%rcx),%xmm4,%xmm4
+
+	vpaddq	0(%r11),%xmm0,%xmm0
+	vpaddq	16(%r11),%xmm1,%xmm1
+	vpaddq	32(%r11),%xmm2,%xmm2
+	vpaddq	48(%r11),%xmm3,%xmm3
+	vpaddq	64(%r11),%xmm4,%xmm4
+
+	leaq	32(%rsi),%rax
+	leaq	64(%rsi),%rsi
+	subq	$64,%rdx
+	cmovcq	%rax,%rsi
+
+	vpmuludq	%xmm0,%xmm9,%xmm5
+	vpmuludq	%xmm1,%xmm9,%xmm6
+	vpaddq	%xmm5,%xmm10,%xmm10
+	vpaddq	%xmm6,%xmm11,%xmm11
+	vmovdqa	-128(%r11),%xmm7
+	vpmuludq	%xmm2,%xmm9,%xmm5
+	vpmuludq	%xmm3,%xmm9,%xmm6
+	vpaddq	%xmm5,%xmm12,%xmm12
+	vpaddq	%xmm6,%xmm13,%xmm13
+	vpmuludq	%xmm4,%xmm9,%xmm9
+	vpmuludq	-112(%r11),%xmm4,%xmm5
+	vpaddq	%xmm9,%xmm14,%xmm14
+
+	vpaddq	%xmm5,%xmm10,%xmm10
+	vpmuludq	%xmm2,%xmm7,%xmm6
+	vpmuludq	%xmm3,%xmm7,%xmm5
+	vpaddq	%xmm6,%xmm13,%xmm13
+	vmovdqa	-96(%r11),%xmm8
+	vpaddq	%xmm5,%xmm14,%xmm14
+	vpmuludq	%xmm1,%xmm7,%xmm6
+	vpmuludq	%xmm0,%xmm7,%xmm7
+	vpaddq	%xmm6,%xmm12,%xmm12
+	vpaddq	%xmm7,%xmm11,%xmm11
+
+	vmovdqa	-80(%r11),%xmm9
+	vpmuludq	%xmm2,%xmm8,%xmm5
+	vpmuludq	%xmm1,%xmm8,%xmm6
+	vpaddq	%xmm5,%xmm14,%xmm14
+	vpaddq	%xmm6,%xmm13,%xmm13
+	vmovdqa	-64(%r11),%xmm7
+	vpmuludq	%xmm0,%xmm8,%xmm8
+	vpmuludq	%xmm4,%xmm9,%xmm5
+	vpaddq	%xmm8,%xmm12,%xmm12
+	vpaddq	%xmm5,%xmm11,%xmm11
+	vmovdqa	-48(%r11),%xmm8
+	vpmuludq	%xmm3,%xmm9,%xmm9
+	vpmuludq	%xmm1,%xmm7,%xmm6
+	vpaddq	%xmm9,%xmm10,%xmm10
+
+	vmovdqa	-16(%r11),%xmm9
+	vpaddq	%xmm6,%xmm14,%xmm14
+	vpmuludq	%xmm0,%xmm7,%xmm7
+	vpmuludq	%xmm4,%xmm8,%xmm5
+	vpaddq	%xmm7,%xmm13,%xmm13
+	vpaddq	%xmm5,%xmm12,%xmm12
+	vmovdqu	32(%rsi),%xmm5
+	vpmuludq	%xmm3,%xmm8,%xmm7
+	vpmuludq	%xmm2,%xmm8,%xmm8
+	vpaddq	%xmm7,%xmm11,%xmm11
+	vmovdqu	48(%rsi),%xmm6
+	vpaddq	%xmm8,%xmm10,%xmm10
+
+	vpmuludq	%xmm2,%xmm9,%xmm2
+	vpmuludq	%xmm3,%xmm9,%xmm3
+	vpsrldq	$6,%xmm5,%xmm7
+	vpaddq	%xmm2,%xmm11,%xmm11
+	vpmuludq	%xmm4,%xmm9,%xmm4
+	vpsrldq	$6,%xmm6,%xmm8
+	vpaddq	%xmm3,%xmm12,%xmm2
+	vpaddq	%xmm4,%xmm13,%xmm3
+	vpmuludq	-32(%r11),%xmm0,%xmm4
+	vpmuludq	%xmm1,%xmm9,%xmm0
+	vpunpckhqdq	%xmm6,%xmm5,%xmm9
+	vpaddq	%xmm4,%xmm14,%xmm4
+	vpaddq	%xmm0,%xmm10,%xmm0
+
+	vpunpcklqdq	%xmm6,%xmm5,%xmm5
+	vpunpcklqdq	%xmm8,%xmm7,%xmm8
+
+
+	vpsrldq	$5,%xmm9,%xmm9
+	vpsrlq	$26,%xmm5,%xmm6
+	vmovdqa	0(%rsp),%xmm14
+	vpand	%xmm15,%xmm5,%xmm5
+	vpsrlq	$4,%xmm8,%xmm7
+	vpand	%xmm15,%xmm6,%xmm6
+	vpand	0(%rcx),%xmm9,%xmm9
+	vpsrlq	$30,%xmm8,%xmm8
+	vpand	%xmm15,%xmm7,%xmm7
+	vpand	%xmm15,%xmm8,%xmm8
+	vpor	32(%rcx),%xmm9,%xmm9
+
+	vpsrlq	$26,%xmm3,%xmm13
+	vpand	%xmm15,%xmm3,%xmm3
+	vpaddq	%xmm13,%xmm4,%xmm4
+
+	vpsrlq	$26,%xmm0,%xmm10
+	vpand	%xmm15,%xmm0,%xmm0
+	vpaddq	%xmm10,%xmm11,%xmm1
+
+	vpsrlq	$26,%xmm4,%xmm10
+	vpand	%xmm15,%xmm4,%xmm4
+
+	vpsrlq	$26,%xmm1,%xmm11
+	vpand	%xmm15,%xmm1,%xmm1
+	vpaddq	%xmm11,%xmm2,%xmm2
+
+	vpaddq	%xmm10,%xmm0,%xmm0
+	vpsllq	$2,%xmm10,%xmm10
+	vpaddq	%xmm10,%xmm0,%xmm0
+
+	vpsrlq	$26,%xmm2,%xmm12
+	vpand	%xmm15,%xmm2,%xmm2
+	vpaddq	%xmm12,%xmm3,%xmm3
+
+	vpsrlq	$26,%xmm0,%xmm10
+	vpand	%xmm15,%xmm0,%xmm0
+	vpaddq	%xmm10,%xmm1,%xmm1
+
+	vpsrlq	$26,%xmm3,%xmm13
+	vpand	%xmm15,%xmm3,%xmm3
+	vpaddq	%xmm13,%xmm4,%xmm4
+
+	ja	.Loop_avx
+
+.Lskip_loop_avx:
+	vpshufd	$0x10,%xmm14,%xmm14
+	addq	$32,%rdx
+	jnz	.Long_tail_avx
+
+	vpaddq	%xmm2,%xmm7,%xmm7
+	vpaddq	%xmm0,%xmm5,%xmm5
+	vpaddq	%xmm1,%xmm6,%xmm6
+	vpaddq	%xmm3,%xmm8,%xmm8
+	vpaddq	%xmm4,%xmm9,%xmm9
+
+.Long_tail_avx:
+	vmovdqa	%xmm2,32(%r11)
+	vmovdqa	%xmm0,0(%r11)
+	vmovdqa	%xmm1,16(%r11)
+	vmovdqa	%xmm3,48(%r11)
+	vmovdqa	%xmm4,64(%r11)
+
+	vpmuludq	%xmm7,%xmm14,%xmm12
+	vpmuludq	%xmm5,%xmm14,%xmm10
+	vpshufd	$0x10,-48(%rdi),%xmm2
+	vpmuludq	%xmm6,%xmm14,%xmm11
+	vpmuludq	%xmm8,%xmm14,%xmm13
+	vpmuludq	%xmm9,%xmm14,%xmm14
+
+	vpmuludq	%xmm8,%xmm2,%xmm0
+	vpaddq	%xmm0,%xmm14,%xmm14
+	vpshufd	$0x10,-32(%rdi),%xmm3
+	vpmuludq	%xmm7,%xmm2,%xmm1
+	vpaddq	%xmm1,%xmm13,%xmm13
+	vpshufd	$0x10,-16(%rdi),%xmm4
+	vpmuludq	%xmm6,%xmm2,%xmm0
+	vpaddq	%xmm0,%xmm12,%xmm12
+	vpmuludq	%xmm5,%xmm2,%xmm2
+	vpaddq	%xmm2,%xmm11,%xmm11
+	vpmuludq	%xmm9,%xmm3,%xmm3
+	vpaddq	%xmm3,%xmm10,%xmm10
+
+	vpshufd	$0x10,0(%rdi),%xmm2
+	vpmuludq	%xmm7,%xmm4,%xmm1
+	vpaddq	%xmm1,%xmm14,%xmm14
+	vpmuludq	%xmm6,%xmm4,%xmm0
+	vpaddq	%xmm0,%xmm13,%xmm13
+	vpshufd	$0x10,16(%rdi),%xmm3
+	vpmuludq	%xmm5,%xmm4,%xmm4
+	vpaddq	%xmm4,%xmm12,%xmm12
+	vpmuludq	%xmm9,%xmm2,%xmm1
+	vpaddq	%xmm1,%xmm11,%xmm11
+	vpshufd	$0x10,32(%rdi),%xmm4
+	vpmuludq	%xmm8,%xmm2,%xmm2
+	vpaddq	%xmm2,%xmm10,%xmm10
+
+	vpmuludq	%xmm6,%xmm3,%xmm0
+	vpaddq	%xmm0,%xmm14,%xmm14
+	vpmuludq	%xmm5,%xmm3,%xmm3
+	vpaddq	%xmm3,%xmm13,%xmm13
+	vpshufd	$0x10,48(%rdi),%xmm2
+	vpmuludq	%xmm9,%xmm4,%xmm1
+	vpaddq	%xmm1,%xmm12,%xmm12
+	vpshufd	$0x10,64(%rdi),%xmm3
+	vpmuludq	%xmm8,%xmm4,%xmm0
+	vpaddq	%xmm0,%xmm11,%xmm11
+	vpmuludq	%xmm7,%xmm4,%xmm4
+	vpaddq	%xmm4,%xmm10,%xmm10
+
+	vpmuludq	%xmm5,%xmm2,%xmm2
+	vpaddq	%xmm2,%xmm14,%xmm14
+	vpmuludq	%xmm9,%xmm3,%xmm1
+	vpaddq	%xmm1,%xmm13,%xmm13
+	vpmuludq	%xmm8,%xmm3,%xmm0
+	vpaddq	%xmm0,%xmm12,%xmm12
+	vpmuludq	%xmm7,%xmm3,%xmm1
+	vpaddq	%xmm1,%xmm11,%xmm11
+	vpmuludq	%xmm6,%xmm3,%xmm3
+	vpaddq	%xmm3,%xmm10,%xmm10
+
+	jz	.Lshort_tail_avx
+
+	vmovdqu	0(%rsi),%xmm0
+	vmovdqu	16(%rsi),%xmm1
+
+	vpsrldq	$6,%xmm0,%xmm2
+	vpsrldq	$6,%xmm1,%xmm3
+	vpunpckhqdq	%xmm1,%xmm0,%xmm4
+	vpunpcklqdq	%xmm1,%xmm0,%xmm0
+	vpunpcklqdq	%xmm3,%xmm2,%xmm3
+
+	vpsrlq	$40,%xmm4,%xmm4
+	vpsrlq	$26,%xmm0,%xmm1
+	vpand	%xmm15,%xmm0,%xmm0
+	vpsrlq	$4,%xmm3,%xmm2
+	vpand	%xmm15,%xmm1,%xmm1
+	vpsrlq	$30,%xmm3,%xmm3
+	vpand	%xmm15,%xmm2,%xmm2
+	vpand	%xmm15,%xmm3,%xmm3
+	vpor	32(%rcx),%xmm4,%xmm4
+
+	vpshufd	$0x32,-64(%rdi),%xmm9
+	vpaddq	0(%r11),%xmm0,%xmm0
+	vpaddq	16(%r11),%xmm1,%xmm1
+	vpaddq	32(%r11),%xmm2,%xmm2
+	vpaddq	48(%r11),%xmm3,%xmm3
+	vpaddq	64(%r11),%xmm4,%xmm4
+
+	vpmuludq	%xmm0,%xmm9,%xmm5
+	vpaddq	%xmm5,%xmm10,%xmm10
+	vpmuludq	%xmm1,%xmm9,%xmm6
+	vpaddq	%xmm6,%xmm11,%xmm11
+	vpmuludq	%xmm2,%xmm9,%xmm5
+	vpaddq	%xmm5,%xmm12,%xmm12
+	vpshufd	$0x32,-48(%rdi),%xmm7
+	vpmuludq	%xmm3,%xmm9,%xmm6
+	vpaddq	%xmm6,%xmm13,%xmm13
+	vpmuludq	%xmm4,%xmm9,%xmm9
+	vpaddq	%xmm9,%xmm14,%xmm14
+
+	vpmuludq	%xmm3,%xmm7,%xmm5
+	vpaddq	%xmm5,%xmm14,%xmm14
+	vpshufd	$0x32,-32(%rdi),%xmm8
+	vpmuludq	%xmm2,%xmm7,%xmm6
+	vpaddq	%xmm6,%xmm13,%xmm13
+	vpshufd	$0x32,-16(%rdi),%xmm9
+	vpmuludq	%xmm1,%xmm7,%xmm5
+	vpaddq	%xmm5,%xmm12,%xmm12
+	vpmuludq	%xmm0,%xmm7,%xmm7
+	vpaddq	%xmm7,%xmm11,%xmm11
+	vpmuludq	%xmm4,%xmm8,%xmm8
+	vpaddq	%xmm8,%xmm10,%xmm10
+
+	vpshufd	$0x32,0(%rdi),%xmm7
+	vpmuludq	%xmm2,%xmm9,%xmm6
+	vpaddq	%xmm6,%xmm14,%xmm14
+	vpmuludq	%xmm1,%xmm9,%xmm5
+	vpaddq	%xmm5,%xmm13,%xmm13
+	vpshufd	$0x32,16(%rdi),%xmm8
+	vpmuludq	%xmm0,%xmm9,%xmm9
+	vpaddq	%xmm9,%xmm12,%xmm12
+	vpmuludq	%xmm4,%xmm7,%xmm6
+	vpaddq	%xmm6,%xmm11,%xmm11
+	vpshufd	$0x32,32(%rdi),%xmm9
+	vpmuludq	%xmm3,%xmm7,%xmm7
+	vpaddq	%xmm7,%xmm10,%xmm10
+
+	vpmuludq	%xmm1,%xmm8,%xmm5
+	vpaddq	%xmm5,%xmm14,%xmm14
+	vpmuludq	%xmm0,%xmm8,%xmm8
+	vpaddq	%xmm8,%xmm13,%xmm13
+	vpshufd	$0x32,48(%rdi),%xmm7
+	vpmuludq	%xmm4,%xmm9,%xmm6
+	vpaddq	%xmm6,%xmm12,%xmm12
+	vpshufd	$0x32,64(%rdi),%xmm8
+	vpmuludq	%xmm3,%xmm9,%xmm5
+	vpaddq	%xmm5,%xmm11,%xmm11
+	vpmuludq	%xmm2,%xmm9,%xmm9
+	vpaddq	%xmm9,%xmm10,%xmm10
+
+	vpmuludq	%xmm0,%xmm7,%xmm7
+	vpaddq	%xmm7,%xmm14,%xmm14
+	vpmuludq	%xmm4,%xmm8,%xmm6
+	vpaddq	%xmm6,%xmm13,%xmm13
+	vpmuludq	%xmm3,%xmm8,%xmm5
+	vpaddq	%xmm5,%xmm12,%xmm12
+	vpmuludq	%xmm2,%xmm8,%xmm6
+	vpaddq	%xmm6,%xmm11,%xmm11
+	vpmuludq	%xmm1,%xmm8,%xmm8
+	vpaddq	%xmm8,%xmm10,%xmm10
+
+.Lshort_tail_avx:
+
+	vpsrldq	$8,%xmm14,%xmm9
+	vpsrldq	$8,%xmm13,%xmm8
+	vpsrldq	$8,%xmm11,%xmm6
+	vpsrldq	$8,%xmm10,%xmm5
+	vpsrldq	$8,%xmm12,%xmm7
+	vpaddq	%xmm8,%xmm13,%xmm13
+	vpaddq	%xmm9,%xmm14,%xmm14
+	vpaddq	%xmm5,%xmm10,%xmm10
+	vpaddq	%xmm6,%xmm11,%xmm11
+	vpaddq	%xmm7,%xmm12,%xmm12
+
+	vpsrlq	$26,%xmm13,%xmm3
+	vpand	%xmm15,%xmm13,%xmm13
+	vpaddq	%xmm3,%xmm14,%xmm14
+
+	vpsrlq	$26,%xmm10,%xmm0
+	vpand	%xmm15,%xmm10,%xmm10
+	vpaddq	%xmm0,%xmm11,%xmm11
+
+	vpsrlq	$26,%xmm14,%xmm4
+	vpand	%xmm15,%xmm14,%xmm14
+
+	vpsrlq	$26,%xmm11,%xmm1
+	vpand	%xmm15,%xmm11,%xmm11
+	vpaddq	%xmm1,%xmm12,%xmm12
+
+	vpaddq	%xmm4,%xmm10,%xmm10
+	vpsllq	$2,%xmm4,%xmm4
+	vpaddq	%xmm4,%xmm10,%xmm10
+
+	vpsrlq	$26,%xmm12,%xmm2
+	vpand	%xmm15,%xmm12,%xmm12
+	vpaddq	%xmm2,%xmm13,%xmm13
+
+	vpsrlq	$26,%xmm10,%xmm0
+	vpand	%xmm15,%xmm10,%xmm10
+	vpaddq	%xmm0,%xmm11,%xmm11
+
+	vpsrlq	$26,%xmm13,%xmm3
+	vpand	%xmm15,%xmm13,%xmm13
+	vpaddq	%xmm3,%xmm14,%xmm14
+
+	vmovd	%xmm10,-112(%rdi)
+	vmovd	%xmm11,-108(%rdi)
+	vmovd	%xmm12,-104(%rdi)
+	vmovd	%xmm13,-100(%rdi)
+	vmovd	%xmm14,-96(%rdi)
+	leaq	-8(%r10),%rsp
+
+	vzeroupper
+	ret
+ENDPROC(poly1305_blocks_avx)
+
+.align	32
+ENTRY(poly1305_emit_avx)
+	cmpl	$0,20(%rdi)
+	je	.Lemit
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ecx
+	movl	8(%rdi),%r8d
+	movl	12(%rdi),%r11d
+	movl	16(%rdi),%r10d
+
+	shlq	$26,%rcx
+	movq	%r8,%r9
+	shlq	$52,%r8
+	addq	%rcx,%rax
+	shrq	$12,%r9
+	addq	%rax,%r8
+	adcq	$0,%r9
+
+	shlq	$14,%r11
+	movq	%r10,%rax
+	shrq	$24,%r10
+	addq	%r11,%r9
+	shlq	$40,%rax
+	addq	%rax,%r9
+	adcq	$0,%r10
+
+	movq	%r10,%rax
+	movq	%r10,%rcx
+	andq	$3,%r10
+	shrq	$2,%rax
+	andq	$-4,%rcx
+	addq	%rcx,%rax
+	addq	%rax,%r8
+	adcq	$0,%r9
+	adcq	$0,%r10
+
+	movq	%r8,%rax
+	addq	$5,%r8
+	movq	%r9,%rcx
+	adcq	$0,%r9
+	adcq	$0,%r10
+	shrq	$2,%r10
+	cmovnzq	%r8,%rax
+	cmovnzq	%r9,%rcx
+
+	addq	0(%rdx),%rax
+	adcq	8(%rdx),%rcx
+	movq	%rax,0(%rsi)
+	movq	%rcx,8(%rsi)
+
+	ret
+ENDPROC(poly1305_emit_avx)
+#endif /* CONFIG_AS_AVX */
+
+#ifdef CONFIG_AS_AVX2
+.align	32
+ENTRY(poly1305_blocks_avx2)
+
+	movl	20(%rdi),%r8d
+	cmpq	$128,%rdx
+	jae	.Lblocks_avx2
+	testl	%r8d,%r8d
+	jz	.Lblocks
+
+.Lblocks_avx2:
+	andq	$-16,%rdx
+	jz	.Lno_data_avx2
+
+	vzeroupper
+
+	testl	%r8d,%r8d
+	jz	.Lbase2_64_avx2
+
+	testq	$63,%rdx
+	jz	.Leven_avx2
+
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	pushq	%rdi
+
+.Lblocks_avx2_body:
+
+	movq	%rdx,%r15
+
+	movq	0(%rdi),%r8
+	movq	8(%rdi),%r9
+	movl	16(%rdi),%r10d
+
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%r13
+
+
+	movl	%r8d,%r14d
+	andq	$-2147483648,%r8
+	movq	%r9,%r12
+	movl	%r9d,%ebx
+	andq	$-2147483648,%r9
+
+	shrq	$6,%r8
+	shlq	$52,%r12
+	addq	%r8,%r14
+	shrq	$12,%rbx
+	shrq	$18,%r9
+	addq	%r12,%r14
+	adcq	%r9,%rbx
+
+	movq	%r10,%r8
+	shlq	$40,%r8
+	shrq	$24,%r10
+	addq	%r8,%rbx
+	adcq	$0,%r10
+
+	movq	$-4,%r9
+	movq	%r10,%r8
+	andq	%r10,%r9
+	shrq	$2,%r8
+	andq	$3,%r10
+	addq	%r9,%r8
+	addq	%r8,%r14
+	adcq	$0,%rbx
+	adcq	$0,%r10
+
+	movq	%r13,%r12
+	movq	%r13,%rax
+	shrq	$2,%r13
+	addq	%r12,%r13
+
+.Lbase2_26_pre_avx2:
+	addq	0(%rsi),%r14
+	adcq	8(%rsi),%rbx
+	leaq	16(%rsi),%rsi
+	adcq	%rcx,%r10
+	subq	$16,%r15
+
+	movq	%rdi,0(%rsp)
+	__poly1305_block
+	movq	0(%rsp),%rdi
+	movq	%r12,%rax
+
+	testq	$63,%r15
+	jnz	.Lbase2_26_pre_avx2
+
+	testq	%rcx,%rcx
+	jz	.Lstore_base2_64_avx2
+
+
+	movq	%r14,%rax
+	movq	%r14,%rdx
+	shrq	$52,%r14
+	movq	%rbx,%r11
+	movq	%rbx,%r12
+	shrq	$26,%rdx
+	andq	$0x3ffffff,%rax
+	shlq	$12,%r11
+	andq	$0x3ffffff,%rdx
+	shrq	$14,%rbx
+	orq	%r11,%r14
+	shlq	$24,%r10
+	andq	$0x3ffffff,%r14
+	shrq	$40,%r12
+	andq	$0x3ffffff,%rbx
+	orq	%r12,%r10
+
+	testq	%r15,%r15
+	jz	.Lstore_base2_26_avx2
+
+	vmovd	%eax,%xmm0
+	vmovd	%edx,%xmm1
+	vmovd	%r14d,%xmm2
+	vmovd	%ebx,%xmm3
+	vmovd	%r10d,%xmm4
+	jmp	.Lproceed_avx2
+
+.align	32
+.Lstore_base2_64_avx2:
+	movq	%r14,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%r10,16(%rdi)
+	jmp	.Ldone_avx2
+
+.align	16
+.Lstore_base2_26_avx2:
+	movl	%eax,0(%rdi)
+	movl	%edx,4(%rdi)
+	movl	%r14d,8(%rdi)
+	movl	%ebx,12(%rdi)
+	movl	%r10d,16(%rdi)
+.align	16
+.Ldone_avx2:
+	movq	8(%rsp),%r15
+	movq	16(%rsp),%r14
+	movq	24(%rsp),%r13
+	movq	32(%rsp),%r12
+	movq	40(%rsp),%rbx
+	leaq	48(%rsp),%rsp
+
+.Lno_data_avx2:
+.Lblocks_avx2_epilogue:
+	ret
+
+
+.align	32
+.Lbase2_64_avx2:
+
+
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	pushq	%rdi
+
+.Lbase2_64_avx2_body:
+
+	movq	%rdx,%r15
+
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%r13
+
+	movq	0(%rdi),%r14
+	movq	8(%rdi),%rbx
+	movl	16(%rdi),%r10d
+
+	movq	%r13,%r12
+	movq	%r13,%rax
+	shrq	$2,%r13
+	addq	%r12,%r13
+
+	testq	$63,%rdx
+	jz	.Linit_avx2
+
+.Lbase2_64_pre_avx2:
+	addq	0(%rsi),%r14
+	adcq	8(%rsi),%rbx
+	leaq	16(%rsi),%rsi
+	adcq	%rcx,%r10
+	subq	$16,%r15
+
+	movq	%rdi,0(%rsp)
+	__poly1305_block
+	movq	0(%rsp),%rdi
+	movq	%r12,%rax
+
+	testq	$63,%r15
+	jnz	.Lbase2_64_pre_avx2
+
+.Linit_avx2:
+
+	movq	%r14,%rax
+	movq	%r14,%rdx
+	shrq	$52,%r14
+	movq	%rbx,%r8
+	movq	%rbx,%r9
+	shrq	$26,%rdx
+	andq	$0x3ffffff,%rax
+	shlq	$12,%r8
+	andq	$0x3ffffff,%rdx
+	shrq	$14,%rbx
+	orq	%r8,%r14
+	shlq	$24,%r10
+	andq	$0x3ffffff,%r14
+	shrq	$40,%r9
+	andq	$0x3ffffff,%rbx
+	orq	%r9,%r10
+
+	vmovd	%eax,%xmm0
+	vmovd	%edx,%xmm1
+	vmovd	%r14d,%xmm2
+	vmovd	%ebx,%xmm3
+	vmovd	%r10d,%xmm4
+	movl	$1,20(%rdi)
+
+	__poly1305_init_avx
+
+.Lproceed_avx2:
+	movq	%r15,%rdx
+
+	movq	8(%rsp),%r15
+	movq	16(%rsp),%r14
+	movq	24(%rsp),%r13
+	movq	32(%rsp),%r12
+	movq	40(%rsp),%rbx
+	leaq	48(%rsp),%rax
+	leaq	48(%rsp),%rsp
+
+.Lbase2_64_avx2_epilogue:
+	jmp	.Ldo_avx2
+
+
+.align	32
+.Leven_avx2:
+
+	vmovd	0(%rdi),%xmm0
+	vmovd	4(%rdi),%xmm1
+	vmovd	8(%rdi),%xmm2
+	vmovd	12(%rdi),%xmm3
+	vmovd	16(%rdi),%xmm4
+
+.Ldo_avx2:
+	leaq	8(%rsp),%r10
+	subq	$0x128,%rsp
+	leaq	.Lconst(%rip),%rcx
+	leaq	48+64(%rdi),%rdi
+	vmovdqa	96(%rcx),%ymm7
+
+
+	vmovdqu	-64(%rdi),%xmm9
+	andq	$-512,%rsp
+	vmovdqu	-48(%rdi),%xmm10
+	vmovdqu	-32(%rdi),%xmm6
+	vmovdqu	-16(%rdi),%xmm11
+	vmovdqu	0(%rdi),%xmm12
+	vmovdqu	16(%rdi),%xmm13
+	leaq	144(%rsp),%rax
+	vmovdqu	32(%rdi),%xmm14
+	vpermd	%ymm9,%ymm7,%ymm9
+	vmovdqu	48(%rdi),%xmm15
+	vpermd	%ymm10,%ymm7,%ymm10
+	vmovdqu	64(%rdi),%xmm5
+	vpermd	%ymm6,%ymm7,%ymm6
+	vmovdqa	%ymm9,0(%rsp)
+	vpermd	%ymm11,%ymm7,%ymm11
+	vmovdqa	%ymm10,32-144(%rax)
+	vpermd	%ymm12,%ymm7,%ymm12
+	vmovdqa	%ymm6,64-144(%rax)
+	vpermd	%ymm13,%ymm7,%ymm13
+	vmovdqa	%ymm11,96-144(%rax)
+	vpermd	%ymm14,%ymm7,%ymm14
+	vmovdqa	%ymm12,128-144(%rax)
+	vpermd	%ymm15,%ymm7,%ymm15
+	vmovdqa	%ymm13,160-144(%rax)
+	vpermd	%ymm5,%ymm7,%ymm5
+	vmovdqa	%ymm14,192-144(%rax)
+	vmovdqa	%ymm15,224-144(%rax)
+	vmovdqa	%ymm5,256-144(%rax)
+	vmovdqa	64(%rcx),%ymm5
+
+
+
+	vmovdqu	0(%rsi),%xmm7
+	vmovdqu	16(%rsi),%xmm8
+	vinserti128	$1,32(%rsi),%ymm7,%ymm7
+	vinserti128	$1,48(%rsi),%ymm8,%ymm8
+	leaq	64(%rsi),%rsi
+
+	vpsrldq	$6,%ymm7,%ymm9
+	vpsrldq	$6,%ymm8,%ymm10
+	vpunpckhqdq	%ymm8,%ymm7,%ymm6
+	vpunpcklqdq	%ymm10,%ymm9,%ymm9
+	vpunpcklqdq	%ymm8,%ymm7,%ymm7
+
+	vpsrlq	$30,%ymm9,%ymm10
+	vpsrlq	$4,%ymm9,%ymm9
+	vpsrlq	$26,%ymm7,%ymm8
+	vpsrlq	$40,%ymm6,%ymm6
+	vpand	%ymm5,%ymm9,%ymm9
+	vpand	%ymm5,%ymm7,%ymm7
+	vpand	%ymm5,%ymm8,%ymm8
+	vpand	%ymm5,%ymm10,%ymm10
+	vpor	32(%rcx),%ymm6,%ymm6
+
+	vpaddq	%ymm2,%ymm9,%ymm2
+	subq	$64,%rdx
+	jz	.Ltail_avx2
+	jmp	.Loop_avx2
+
+.align	32
+.Loop_avx2:
+
+	vpaddq	%ymm0,%ymm7,%ymm0
+	vmovdqa	0(%rsp),%ymm7
+	vpaddq	%ymm1,%ymm8,%ymm1
+	vmovdqa	32(%rsp),%ymm8
+	vpaddq	%ymm3,%ymm10,%ymm3
+	vmovdqa	96(%rsp),%ymm9
+	vpaddq	%ymm4,%ymm6,%ymm4
+	vmovdqa	48(%rax),%ymm10
+	vmovdqa	112(%rax),%ymm5
+
+	vpmuludq	%ymm2,%ymm7,%ymm13
+	vpmuludq	%ymm2,%ymm8,%ymm14
+	vpmuludq	%ymm2,%ymm9,%ymm15
+	vpmuludq	%ymm2,%ymm10,%ymm11
+	vpmuludq	%ymm2,%ymm5,%ymm12
+
+	vpmuludq	%ymm0,%ymm8,%ymm6
+	vpmuludq	%ymm1,%ymm8,%ymm2
+	vpaddq	%ymm6,%ymm12,%ymm12
+	vpaddq	%ymm2,%ymm13,%ymm13
+	vpmuludq	%ymm3,%ymm8,%ymm6
+	vpmuludq	64(%rsp),%ymm4,%ymm2
+	vpaddq	%ymm6,%ymm15,%ymm15
+	vpaddq	%ymm2,%ymm11,%ymm11
+	vmovdqa	-16(%rax),%ymm8
+
+	vpmuludq	%ymm0,%ymm7,%ymm6
+	vpmuludq	%ymm1,%ymm7,%ymm2
+	vpaddq	%ymm6,%ymm11,%ymm11
+	vpaddq	%ymm2,%ymm12,%ymm12
+	vpmuludq	%ymm3,%ymm7,%ymm6
+	vpmuludq	%ymm4,%ymm7,%ymm2
+	vmovdqu	0(%rsi),%xmm7
+	vpaddq	%ymm6,%ymm14,%ymm14
+	vpaddq	%ymm2,%ymm15,%ymm15
+	vinserti128	$1,32(%rsi),%ymm7,%ymm7
+
+	vpmuludq	%ymm3,%ymm8,%ymm6
+	vpmuludq	%ymm4,%ymm8,%ymm2
+	vmovdqu	16(%rsi),%xmm8
+	vpaddq	%ymm6,%ymm11,%ymm11
+	vpaddq	%ymm2,%ymm12,%ymm12
+	vmovdqa	16(%rax),%ymm2
+	vpmuludq	%ymm1,%ymm9,%ymm6
+	vpmuludq	%ymm0,%ymm9,%ymm9
+	vpaddq	%ymm6,%ymm14,%ymm14
+	vpaddq	%ymm9,%ymm13,%ymm13
+	vinserti128	$1,48(%rsi),%ymm8,%ymm8
+	leaq	64(%rsi),%rsi
+
+	vpmuludq	%ymm1,%ymm2,%ymm6
+	vpmuludq	%ymm0,%ymm2,%ymm2
+	vpsrldq	$6,%ymm7,%ymm9
+	vpaddq	%ymm6,%ymm15,%ymm15
+	vpaddq	%ymm2,%ymm14,%ymm14
+	vpmuludq	%ymm3,%ymm10,%ymm6
+	vpmuludq	%ymm4,%ymm10,%ymm2
+	vpsrldq	$6,%ymm8,%ymm10
+	vpaddq	%ymm6,%ymm12,%ymm12
+	vpaddq	%ymm2,%ymm13,%ymm13
+	vpunpckhqdq	%ymm8,%ymm7,%ymm6
+
+	vpmuludq	%ymm3,%ymm5,%ymm3
+	vpmuludq	%ymm4,%ymm5,%ymm4
+	vpunpcklqdq	%ymm8,%ymm7,%ymm7
+	vpaddq	%ymm3,%ymm13,%ymm2
+	vpaddq	%ymm4,%ymm14,%ymm3
+	vpunpcklqdq	%ymm10,%ymm9,%ymm10
+	vpmuludq	80(%rax),%ymm0,%ymm4
+	vpmuludq	%ymm1,%ymm5,%ymm0
+	vmovdqa	64(%rcx),%ymm5
+	vpaddq	%ymm4,%ymm15,%ymm4
+	vpaddq	%ymm0,%ymm11,%ymm0
+
+	vpsrlq	$26,%ymm3,%ymm14
+	vpand	%ymm5,%ymm3,%ymm3
+	vpaddq	%ymm14,%ymm4,%ymm4
+
+	vpsrlq	$26,%ymm0,%ymm11
+	vpand	%ymm5,%ymm0,%ymm0
+	vpaddq	%ymm11,%ymm12,%ymm1
+
+	vpsrlq	$26,%ymm4,%ymm15
+	vpand	%ymm5,%ymm4,%ymm4
+
+	vpsrlq	$4,%ymm10,%ymm9
+
+	vpsrlq	$26,%ymm1,%ymm12
+	vpand	%ymm5,%ymm1,%ymm1
+	vpaddq	%ymm12,%ymm2,%ymm2
+
+	vpaddq	%ymm15,%ymm0,%ymm0
+	vpsllq	$2,%ymm15,%ymm15
+	vpaddq	%ymm15,%ymm0,%ymm0
+
+	vpand	%ymm5,%ymm9,%ymm9
+	vpsrlq	$26,%ymm7,%ymm8
+
+	vpsrlq	$26,%ymm2,%ymm13
+	vpand	%ymm5,%ymm2,%ymm2
+	vpaddq	%ymm13,%ymm3,%ymm3
+
+	vpaddq	%ymm9,%ymm2,%ymm2
+	vpsrlq	$30,%ymm10,%ymm10
+
+	vpsrlq	$26,%ymm0,%ymm11
+	vpand	%ymm5,%ymm0,%ymm0
+	vpaddq	%ymm11,%ymm1,%ymm1
+
+	vpsrlq	$40,%ymm6,%ymm6
+
+	vpsrlq	$26,%ymm3,%ymm14
+	vpand	%ymm5,%ymm3,%ymm3
+	vpaddq	%ymm14,%ymm4,%ymm4
+
+	vpand	%ymm5,%ymm7,%ymm7
+	vpand	%ymm5,%ymm8,%ymm8
+	vpand	%ymm5,%ymm10,%ymm10
+	vpor	32(%rcx),%ymm6,%ymm6
+
+	subq	$64,%rdx
+	jnz	.Loop_avx2
+
+.byte	0x66,0x90
+.Ltail_avx2:
+
+	vpaddq	%ymm0,%ymm7,%ymm0
+	vmovdqu	4(%rsp),%ymm7
+	vpaddq	%ymm1,%ymm8,%ymm1
+	vmovdqu	36(%rsp),%ymm8
+	vpaddq	%ymm3,%ymm10,%ymm3
+	vmovdqu	100(%rsp),%ymm9
+	vpaddq	%ymm4,%ymm6,%ymm4
+	vmovdqu	52(%rax),%ymm10
+	vmovdqu	116(%rax),%ymm5
+
+	vpmuludq	%ymm2,%ymm7,%ymm13
+	vpmuludq	%ymm2,%ymm8,%ymm14
+	vpmuludq	%ymm2,%ymm9,%ymm15
+	vpmuludq	%ymm2,%ymm10,%ymm11
+	vpmuludq	%ymm2,%ymm5,%ymm12
+
+	vpmuludq	%ymm0,%ymm8,%ymm6
+	vpmuludq	%ymm1,%ymm8,%ymm2
+	vpaddq	%ymm6,%ymm12,%ymm12
+	vpaddq	%ymm2,%ymm13,%ymm13
+	vpmuludq	%ymm3,%ymm8,%ymm6
+	vpmuludq	68(%rsp),%ymm4,%ymm2
+	vpaddq	%ymm6,%ymm15,%ymm15
+	vpaddq	%ymm2,%ymm11,%ymm11
+
+	vpmuludq	%ymm0,%ymm7,%ymm6
+	vpmuludq	%ymm1,%ymm7,%ymm2
+	vpaddq	%ymm6,%ymm11,%ymm11
+	vmovdqu	-12(%rax),%ymm8
+	vpaddq	%ymm2,%ymm12,%ymm12
+	vpmuludq	%ymm3,%ymm7,%ymm6
+	vpmuludq	%ymm4,%ymm7,%ymm2
+	vpaddq	%ymm6,%ymm14,%ymm14
+	vpaddq	%ymm2,%ymm15,%ymm15
+
+	vpmuludq	%ymm3,%ymm8,%ymm6
+	vpmuludq	%ymm4,%ymm8,%ymm2
+	vpaddq	%ymm6,%ymm11,%ymm11
+	vpaddq	%ymm2,%ymm12,%ymm12
+	vmovdqu	20(%rax),%ymm2
+	vpmuludq	%ymm1,%ymm9,%ymm6
+	vpmuludq	%ymm0,%ymm9,%ymm9
+	vpaddq	%ymm6,%ymm14,%ymm14
+	vpaddq	%ymm9,%ymm13,%ymm13
+
+	vpmuludq	%ymm1,%ymm2,%ymm6
+	vpmuludq	%ymm0,%ymm2,%ymm2
+	vpaddq	%ymm6,%ymm15,%ymm15
+	vpaddq	%ymm2,%ymm14,%ymm14
+	vpmuludq	%ymm3,%ymm10,%ymm6
+	vpmuludq	%ymm4,%ymm10,%ymm2
+	vpaddq	%ymm6,%ymm12,%ymm12
+	vpaddq	%ymm2,%ymm13,%ymm13
+
+	vpmuludq	%ymm3,%ymm5,%ymm3
+	vpmuludq	%ymm4,%ymm5,%ymm4
+	vpaddq	%ymm3,%ymm13,%ymm2
+	vpaddq	%ymm4,%ymm14,%ymm3
+	vpmuludq	84(%rax),%ymm0,%ymm4
+	vpmuludq	%ymm1,%ymm5,%ymm0
+	vmovdqa	64(%rcx),%ymm5
+	vpaddq	%ymm4,%ymm15,%ymm4
+	vpaddq	%ymm0,%ymm11,%ymm0
+
+	vpsrldq	$8,%ymm12,%ymm8
+	vpsrldq	$8,%ymm2,%ymm9
+	vpsrldq	$8,%ymm3,%ymm10
+	vpsrldq	$8,%ymm4,%ymm6
+	vpsrldq	$8,%ymm0,%ymm7
+	vpaddq	%ymm8,%ymm12,%ymm12
+	vpaddq	%ymm9,%ymm2,%ymm2
+	vpaddq	%ymm10,%ymm3,%ymm3
+	vpaddq	%ymm6,%ymm4,%ymm4
+	vpaddq	%ymm7,%ymm0,%ymm0
+
+	vpermq	$0x2,%ymm3,%ymm10
+	vpermq	$0x2,%ymm4,%ymm6
+	vpermq	$0x2,%ymm0,%ymm7
+	vpermq	$0x2,%ymm12,%ymm8
+	vpermq	$0x2,%ymm2,%ymm9
+	vpaddq	%ymm10,%ymm3,%ymm3
+	vpaddq	%ymm6,%ymm4,%ymm4
+	vpaddq	%ymm7,%ymm0,%ymm0
+	vpaddq	%ymm8,%ymm12,%ymm12
+	vpaddq	%ymm9,%ymm2,%ymm2
+
+	vpsrlq	$26,%ymm3,%ymm14
+	vpand	%ymm5,%ymm3,%ymm3
+	vpaddq	%ymm14,%ymm4,%ymm4
+
+	vpsrlq	$26,%ymm0,%ymm11
+	vpand	%ymm5,%ymm0,%ymm0
+	vpaddq	%ymm11,%ymm12,%ymm1
+
+	vpsrlq	$26,%ymm4,%ymm15
+	vpand	%ymm5,%ymm4,%ymm4
+
+	vpsrlq	$26,%ymm1,%ymm12
+	vpand	%ymm5,%ymm1,%ymm1
+	vpaddq	%ymm12,%ymm2,%ymm2
+
+	vpaddq	%ymm15,%ymm0,%ymm0
+	vpsllq	$2,%ymm15,%ymm15
+	vpaddq	%ymm15,%ymm0,%ymm0
+
+	vpsrlq	$26,%ymm2,%ymm13
+	vpand	%ymm5,%ymm2,%ymm2
+	vpaddq	%ymm13,%ymm3,%ymm3
+
+	vpsrlq	$26,%ymm0,%ymm11
+	vpand	%ymm5,%ymm0,%ymm0
+	vpaddq	%ymm11,%ymm1,%ymm1
+
+	vpsrlq	$26,%ymm3,%ymm14
+	vpand	%ymm5,%ymm3,%ymm3
+	vpaddq	%ymm14,%ymm4,%ymm4
+
+	vmovd	%xmm0,-112(%rdi)
+	vmovd	%xmm1,-108(%rdi)
+	vmovd	%xmm2,-104(%rdi)
+	vmovd	%xmm3,-100(%rdi)
+	vmovd	%xmm4,-96(%rdi)
+	leaq	-8(%r10),%rsp
+
+	vzeroupper
+	ret
+
+ENDPROC(poly1305_blocks_avx2)
+#endif /* CONFIG_AS_AVX2 */
+
+#ifdef CONFIG_AS_AVX512
+.align	32
+ENTRY(poly1305_blocks_avx512)
+
+	movl	20(%rdi),%r8d
+	cmpq	$128,%rdx
+	jae	.Lblocks_avx2_512
+	testl	%r8d,%r8d
+	jz	.Lblocks
+
+.Lblocks_avx2_512:
+	andq	$-16,%rdx
+	jz	.Lno_data_avx2_512
+
+	vzeroupper
+
+	testl	%r8d,%r8d
+	jz	.Lbase2_64_avx2_512
+
+	testq	$63,%rdx
+	jz	.Leven_avx2_512
+
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	pushq	%rdi
+
+.Lblocks_avx2_body_512:
+
+	movq	%rdx,%r15
+
+	movq	0(%rdi),%r8
+	movq	8(%rdi),%r9
+	movl	16(%rdi),%r10d
+
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%r13
+
+
+	movl	%r8d,%r14d
+	andq	$-2147483648,%r8
+	movq	%r9,%r12
+	movl	%r9d,%ebx
+	andq	$-2147483648,%r9
+
+	shrq	$6,%r8
+	shlq	$52,%r12
+	addq	%r8,%r14
+	shrq	$12,%rbx
+	shrq	$18,%r9
+	addq	%r12,%r14
+	adcq	%r9,%rbx
+
+	movq	%r10,%r8
+	shlq	$40,%r8
+	shrq	$24,%r10
+	addq	%r8,%rbx
+	adcq	$0,%r10
+
+	movq	$-4,%r9
+	movq	%r10,%r8
+	andq	%r10,%r9
+	shrq	$2,%r8
+	andq	$3,%r10
+	addq	%r9,%r8
+	addq	%r8,%r14
+	adcq	$0,%rbx
+	adcq	$0,%r10
+
+	movq	%r13,%r12
+	movq	%r13,%rax
+	shrq	$2,%r13
+	addq	%r12,%r13
+
+.Lbase2_26_pre_avx2_512:
+	addq	0(%rsi),%r14
+	adcq	8(%rsi),%rbx
+	leaq	16(%rsi),%rsi
+	adcq	%rcx,%r10
+	subq	$16,%r15
+
+	movq	%rdi,0(%rsp)
+	__poly1305_block
+	movq	0(%rsp),%rdi
+	movq	%r12,%rax
+
+	testq	$63,%r15
+	jnz	.Lbase2_26_pre_avx2_512
+
+	testq	%rcx,%rcx
+	jz	.Lstore_base2_64_avx2_512
+
+
+	movq	%r14,%rax
+	movq	%r14,%rdx
+	shrq	$52,%r14
+	movq	%rbx,%r11
+	movq	%rbx,%r12
+	shrq	$26,%rdx
+	andq	$0x3ffffff,%rax
+	shlq	$12,%r11
+	andq	$0x3ffffff,%rdx
+	shrq	$14,%rbx
+	orq	%r11,%r14
+	shlq	$24,%r10
+	andq	$0x3ffffff,%r14
+	shrq	$40,%r12
+	andq	$0x3ffffff,%rbx
+	orq	%r12,%r10
+
+	testq	%r15,%r15
+	jz	.Lstore_base2_26_avx2_512
+
+	vmovd	%eax,%xmm0
+	vmovd	%edx,%xmm1
+	vmovd	%r14d,%xmm2
+	vmovd	%ebx,%xmm3
+	vmovd	%r10d,%xmm4
+	jmp	.Lproceed_avx2_512
+
+.align	32
+.Lstore_base2_64_avx2_512:
+	movq	%r14,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%r10,16(%rdi)
+	jmp	.Ldone_avx2_512
+
+.align	16
+.Lstore_base2_26_avx2_512:
+	movl	%eax,0(%rdi)
+	movl	%edx,4(%rdi)
+	movl	%r14d,8(%rdi)
+	movl	%ebx,12(%rdi)
+	movl	%r10d,16(%rdi)
+.align	16
+.Ldone_avx2_512:
+	movq	8(%rsp),%r15
+	movq	16(%rsp),%r14
+	movq	24(%rsp),%r13
+	movq	32(%rsp),%r12
+	movq	40(%rsp),%rbx
+	leaq	48(%rsp),%rsp
+
+.Lno_data_avx2_512:
+.Lblocks_avx2_epilogue_512:
+	ret
+
+
+.align	32
+.Lbase2_64_avx2_512:
+
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	pushq	%rdi
+
+.Lbase2_64_avx2_body_512:
+
+	movq	%rdx,%r15
+
+	movq	24(%rdi),%r11
+	movq	32(%rdi),%r13
+
+	movq	0(%rdi),%r14
+	movq	8(%rdi),%rbx
+	movl	16(%rdi),%r10d
+
+	movq	%r13,%r12
+	movq	%r13,%rax
+	shrq	$2,%r13
+	addq	%r12,%r13
+
+	testq	$63,%rdx
+	jz	.Linit_avx2_512
+
+.Lbase2_64_pre_avx2_512:
+	addq	0(%rsi),%r14
+	adcq	8(%rsi),%rbx
+	leaq	16(%rsi),%rsi
+	adcq	%rcx,%r10
+	subq	$16,%r15
+
+	movq	%rdi,0(%rsp)
+	__poly1305_block
+	movq	0(%rsp),%rdi
+	movq	%r12,%rax
+
+	testq	$63,%r15
+	jnz	.Lbase2_64_pre_avx2_512
+
+.Linit_avx2_512:
+
+	movq	%r14,%rax
+	movq	%r14,%rdx
+	shrq	$52,%r14
+	movq	%rbx,%r8
+	movq	%rbx,%r9
+	shrq	$26,%rdx
+	andq	$0x3ffffff,%rax
+	shlq	$12,%r8
+	andq	$0x3ffffff,%rdx
+	shrq	$14,%rbx
+	orq	%r8,%r14
+	shlq	$24,%r10
+	andq	$0x3ffffff,%r14
+	shrq	$40,%r9
+	andq	$0x3ffffff,%rbx
+	orq	%r9,%r10
+
+	vmovd	%eax,%xmm0
+	vmovd	%edx,%xmm1
+	vmovd	%r14d,%xmm2
+	vmovd	%ebx,%xmm3
+	vmovd	%r10d,%xmm4
+	movl	$1,20(%rdi)
+
+	__poly1305_init_avx
+
+.Lproceed_avx2_512:
+	movq	%r15,%rdx
+
+	movq	8(%rsp),%r15
+	movq	16(%rsp),%r14
+	movq	24(%rsp),%r13
+	movq	32(%rsp),%r12
+	movq	40(%rsp),%rbx
+	leaq	48(%rsp),%rax
+	leaq	48(%rsp),%rsp
+
+.Lbase2_64_avx2_epilogue_512:
+	jmp	.Ldo_avx2_512
+
+
+.align	32
+.Leven_avx2_512:
+
+	vmovd	0(%rdi),%xmm0
+	vmovd	4(%rdi),%xmm1
+	vmovd	8(%rdi),%xmm2
+	vmovd	12(%rdi),%xmm3
+	vmovd	16(%rdi),%xmm4
+
+.Ldo_avx2_512:
+	cmpq	$512,%rdx
+	jae	.Lblocks_avx512
+.Lskip_avx512:
+	leaq	8(%rsp),%r10
+
+	subq	$0x128,%rsp
+	leaq	.Lconst(%rip),%rcx
+	leaq	48+64(%rdi),%rdi
+	vmovdqa	96(%rcx),%ymm7
+
+
+	vmovdqu	-64(%rdi),%xmm9
+	andq	$-512,%rsp
+	vmovdqu	-48(%rdi),%xmm10
+	vmovdqu	-32(%rdi),%xmm6
+	vmovdqu	-16(%rdi),%xmm11
+	vmovdqu	0(%rdi),%xmm12
+	vmovdqu	16(%rdi),%xmm13
+	leaq	144(%rsp),%rax
+	vmovdqu	32(%rdi),%xmm14
+	vpermd	%ymm9,%ymm7,%ymm9
+	vmovdqu	48(%rdi),%xmm15
+	vpermd	%ymm10,%ymm7,%ymm10
+	vmovdqu	64(%rdi),%xmm5
+	vpermd	%ymm6,%ymm7,%ymm6
+	vmovdqa	%ymm9,0(%rsp)
+	vpermd	%ymm11,%ymm7,%ymm11
+	vmovdqa	%ymm10,32-144(%rax)
+	vpermd	%ymm12,%ymm7,%ymm12
+	vmovdqa	%ymm6,64-144(%rax)
+	vpermd	%ymm13,%ymm7,%ymm13
+	vmovdqa	%ymm11,96-144(%rax)
+	vpermd	%ymm14,%ymm7,%ymm14
+	vmovdqa	%ymm12,128-144(%rax)
+	vpermd	%ymm15,%ymm7,%ymm15
+	vmovdqa	%ymm13,160-144(%rax)
+	vpermd	%ymm5,%ymm7,%ymm5
+	vmovdqa	%ymm14,192-144(%rax)
+	vmovdqa	%ymm15,224-144(%rax)
+	vmovdqa	%ymm5,256-144(%rax)
+	vmovdqa	64(%rcx),%ymm5
+
+
+
+	vmovdqu	0(%rsi),%xmm7
+	vmovdqu	16(%rsi),%xmm8
+	vinserti128	$1,32(%rsi),%ymm7,%ymm7
+	vinserti128	$1,48(%rsi),%ymm8,%ymm8
+	leaq	64(%rsi),%rsi
+
+	vpsrldq	$6,%ymm7,%ymm9
+	vpsrldq	$6,%ymm8,%ymm10
+	vpunpckhqdq	%ymm8,%ymm7,%ymm6
+	vpunpcklqdq	%ymm10,%ymm9,%ymm9
+	vpunpcklqdq	%ymm8,%ymm7,%ymm7
+
+	vpsrlq	$30,%ymm9,%ymm10
+	vpsrlq	$4,%ymm9,%ymm9
+	vpsrlq	$26,%ymm7,%ymm8
+	vpsrlq	$40,%ymm6,%ymm6
+	vpand	%ymm5,%ymm9,%ymm9
+	vpand	%ymm5,%ymm7,%ymm7
+	vpand	%ymm5,%ymm8,%ymm8
+	vpand	%ymm5,%ymm10,%ymm10
+	vpor	32(%rcx),%ymm6,%ymm6
+
+	vpaddq	%ymm2,%ymm9,%ymm2
+	subq	$64,%rdx
+	jz	.Ltail_avx2_512
+	jmp	.Loop_avx2_512
+
+.align	32
+.Loop_avx2_512:
+
+	vpaddq	%ymm0,%ymm7,%ymm0
+	vmovdqa	0(%rsp),%ymm7
+	vpaddq	%ymm1,%ymm8,%ymm1
+	vmovdqa	32(%rsp),%ymm8
+	vpaddq	%ymm3,%ymm10,%ymm3
+	vmovdqa	96(%rsp),%ymm9
+	vpaddq	%ymm4,%ymm6,%ymm4
+	vmovdqa	48(%rax),%ymm10
+	vmovdqa	112(%rax),%ymm5
+
+	vpmuludq	%ymm2,%ymm7,%ymm13
+	vpmuludq	%ymm2,%ymm8,%ymm14
+	vpmuludq	%ymm2,%ymm9,%ymm15
+	vpmuludq	%ymm2,%ymm10,%ymm11
+	vpmuludq	%ymm2,%ymm5,%ymm12
+
+	vpmuludq	%ymm0,%ymm8,%ymm6
+	vpmuludq	%ymm1,%ymm8,%ymm2
+	vpaddq	%ymm6,%ymm12,%ymm12
+	vpaddq	%ymm2,%ymm13,%ymm13
+	vpmuludq	%ymm3,%ymm8,%ymm6
+	vpmuludq	64(%rsp),%ymm4,%ymm2
+	vpaddq	%ymm6,%ymm15,%ymm15
+	vpaddq	%ymm2,%ymm11,%ymm11
+	vmovdqa	-16(%rax),%ymm8
+
+	vpmuludq	%ymm0,%ymm7,%ymm6
+	vpmuludq	%ymm1,%ymm7,%ymm2
+	vpaddq	%ymm6,%ymm11,%ymm11
+	vpaddq	%ymm2,%ymm12,%ymm12
+	vpmuludq	%ymm3,%ymm7,%ymm6
+	vpmuludq	%ymm4,%ymm7,%ymm2
+	vmovdqu	0(%rsi),%xmm7
+	vpaddq	%ymm6,%ymm14,%ymm14
+	vpaddq	%ymm2,%ymm15,%ymm15
+	vinserti128	$1,32(%rsi),%ymm7,%ymm7
+
+	vpmuludq	%ymm3,%ymm8,%ymm6
+	vpmuludq	%ymm4,%ymm8,%ymm2
+	vmovdqu	16(%rsi),%xmm8
+	vpaddq	%ymm6,%ymm11,%ymm11
+	vpaddq	%ymm2,%ymm12,%ymm12
+	vmovdqa	16(%rax),%ymm2
+	vpmuludq	%ymm1,%ymm9,%ymm6
+	vpmuludq	%ymm0,%ymm9,%ymm9
+	vpaddq	%ymm6,%ymm14,%ymm14
+	vpaddq	%ymm9,%ymm13,%ymm13
+	vinserti128	$1,48(%rsi),%ymm8,%ymm8
+	leaq	64(%rsi),%rsi
+
+	vpmuludq	%ymm1,%ymm2,%ymm6
+	vpmuludq	%ymm0,%ymm2,%ymm2
+	vpsrldq	$6,%ymm7,%ymm9
+	vpaddq	%ymm6,%ymm15,%ymm15
+	vpaddq	%ymm2,%ymm14,%ymm14
+	vpmuludq	%ymm3,%ymm10,%ymm6
+	vpmuludq	%ymm4,%ymm10,%ymm2
+	vpsrldq	$6,%ymm8,%ymm10
+	vpaddq	%ymm6,%ymm12,%ymm12
+	vpaddq	%ymm2,%ymm13,%ymm13
+	vpunpckhqdq	%ymm8,%ymm7,%ymm6
+
+	vpmuludq	%ymm3,%ymm5,%ymm3
+	vpmuludq	%ymm4,%ymm5,%ymm4
+	vpunpcklqdq	%ymm8,%ymm7,%ymm7
+	vpaddq	%ymm3,%ymm13,%ymm2
+	vpaddq	%ymm4,%ymm14,%ymm3
+	vpunpcklqdq	%ymm10,%ymm9,%ymm10
+	vpmuludq	80(%rax),%ymm0,%ymm4
+	vpmuludq	%ymm1,%ymm5,%ymm0
+	vmovdqa	64(%rcx),%ymm5
+	vpaddq	%ymm4,%ymm15,%ymm4
+	vpaddq	%ymm0,%ymm11,%ymm0
+
+	vpsrlq	$26,%ymm3,%ymm14
+	vpand	%ymm5,%ymm3,%ymm3
+	vpaddq	%ymm14,%ymm4,%ymm4
+
+	vpsrlq	$26,%ymm0,%ymm11
+	vpand	%ymm5,%ymm0,%ymm0
+	vpaddq	%ymm11,%ymm12,%ymm1
+
+	vpsrlq	$26,%ymm4,%ymm15
+	vpand	%ymm5,%ymm4,%ymm4
+
+	vpsrlq	$4,%ymm10,%ymm9
+
+	vpsrlq	$26,%ymm1,%ymm12
+	vpand	%ymm5,%ymm1,%ymm1
+	vpaddq	%ymm12,%ymm2,%ymm2
+
+	vpaddq	%ymm15,%ymm0,%ymm0
+	vpsllq	$2,%ymm15,%ymm15
+	vpaddq	%ymm15,%ymm0,%ymm0
+
+	vpand	%ymm5,%ymm9,%ymm9
+	vpsrlq	$26,%ymm7,%ymm8
+
+	vpsrlq	$26,%ymm2,%ymm13
+	vpand	%ymm5,%ymm2,%ymm2
+	vpaddq	%ymm13,%ymm3,%ymm3
+
+	vpaddq	%ymm9,%ymm2,%ymm2
+	vpsrlq	$30,%ymm10,%ymm10
+
+	vpsrlq	$26,%ymm0,%ymm11
+	vpand	%ymm5,%ymm0,%ymm0
+	vpaddq	%ymm11,%ymm1,%ymm1
+
+	vpsrlq	$40,%ymm6,%ymm6
+
+	vpsrlq	$26,%ymm3,%ymm14
+	vpand	%ymm5,%ymm3,%ymm3
+	vpaddq	%ymm14,%ymm4,%ymm4
+
+	vpand	%ymm5,%ymm7,%ymm7
+	vpand	%ymm5,%ymm8,%ymm8
+	vpand	%ymm5,%ymm10,%ymm10
+	vpor	32(%rcx),%ymm6,%ymm6
+
+	subq	$64,%rdx
+	jnz	.Loop_avx2_512
+
+.byte	0x66,0x90
+.Ltail_avx2_512:
+
+	vpaddq	%ymm0,%ymm7,%ymm0
+	vmovdqu	4(%rsp),%ymm7
+	vpaddq	%ymm1,%ymm8,%ymm1
+	vmovdqu	36(%rsp),%ymm8
+	vpaddq	%ymm3,%ymm10,%ymm3
+	vmovdqu	100(%rsp),%ymm9
+	vpaddq	%ymm4,%ymm6,%ymm4
+	vmovdqu	52(%rax),%ymm10
+	vmovdqu	116(%rax),%ymm5
+
+	vpmuludq	%ymm2,%ymm7,%ymm13
+	vpmuludq	%ymm2,%ymm8,%ymm14
+	vpmuludq	%ymm2,%ymm9,%ymm15
+	vpmuludq	%ymm2,%ymm10,%ymm11
+	vpmuludq	%ymm2,%ymm5,%ymm12
+
+	vpmuludq	%ymm0,%ymm8,%ymm6
+	vpmuludq	%ymm1,%ymm8,%ymm2
+	vpaddq	%ymm6,%ymm12,%ymm12
+	vpaddq	%ymm2,%ymm13,%ymm13
+	vpmuludq	%ymm3,%ymm8,%ymm6
+	vpmuludq	68(%rsp),%ymm4,%ymm2
+	vpaddq	%ymm6,%ymm15,%ymm15
+	vpaddq	%ymm2,%ymm11,%ymm11
+
+	vpmuludq	%ymm0,%ymm7,%ymm6
+	vpmuludq	%ymm1,%ymm7,%ymm2
+	vpaddq	%ymm6,%ymm11,%ymm11
+	vmovdqu	-12(%rax),%ymm8
+	vpaddq	%ymm2,%ymm12,%ymm12
+	vpmuludq	%ymm3,%ymm7,%ymm6
+	vpmuludq	%ymm4,%ymm7,%ymm2
+	vpaddq	%ymm6,%ymm14,%ymm14
+	vpaddq	%ymm2,%ymm15,%ymm15
+
+	vpmuludq	%ymm3,%ymm8,%ymm6
+	vpmuludq	%ymm4,%ymm8,%ymm2
+	vpaddq	%ymm6,%ymm11,%ymm11
+	vpaddq	%ymm2,%ymm12,%ymm12
+	vmovdqu	20(%rax),%ymm2
+	vpmuludq	%ymm1,%ymm9,%ymm6
+	vpmuludq	%ymm0,%ymm9,%ymm9
+	vpaddq	%ymm6,%ymm14,%ymm14
+	vpaddq	%ymm9,%ymm13,%ymm13
+
+	vpmuludq	%ymm1,%ymm2,%ymm6
+	vpmuludq	%ymm0,%ymm2,%ymm2
+	vpaddq	%ymm6,%ymm15,%ymm15
+	vpaddq	%ymm2,%ymm14,%ymm14
+	vpmuludq	%ymm3,%ymm10,%ymm6
+	vpmuludq	%ymm4,%ymm10,%ymm2
+	vpaddq	%ymm6,%ymm12,%ymm12
+	vpaddq	%ymm2,%ymm13,%ymm13
+
+	vpmuludq	%ymm3,%ymm5,%ymm3
+	vpmuludq	%ymm4,%ymm5,%ymm4
+	vpaddq	%ymm3,%ymm13,%ymm2
+	vpaddq	%ymm4,%ymm14,%ymm3
+	vpmuludq	84(%rax),%ymm0,%ymm4
+	vpmuludq	%ymm1,%ymm5,%ymm0
+	vmovdqa	64(%rcx),%ymm5
+	vpaddq	%ymm4,%ymm15,%ymm4
+	vpaddq	%ymm0,%ymm11,%ymm0
+
+	vpsrldq	$8,%ymm12,%ymm8
+	vpsrldq	$8,%ymm2,%ymm9
+	vpsrldq	$8,%ymm3,%ymm10
+	vpsrldq	$8,%ymm4,%ymm6
+	vpsrldq	$8,%ymm0,%ymm7
+	vpaddq	%ymm8,%ymm12,%ymm12
+	vpaddq	%ymm9,%ymm2,%ymm2
+	vpaddq	%ymm10,%ymm3,%ymm3
+	vpaddq	%ymm6,%ymm4,%ymm4
+	vpaddq	%ymm7,%ymm0,%ymm0
+
+	vpermq	$0x2,%ymm3,%ymm10
+	vpermq	$0x2,%ymm4,%ymm6
+	vpermq	$0x2,%ymm0,%ymm7
+	vpermq	$0x2,%ymm12,%ymm8
+	vpermq	$0x2,%ymm2,%ymm9
+	vpaddq	%ymm10,%ymm3,%ymm3
+	vpaddq	%ymm6,%ymm4,%ymm4
+	vpaddq	%ymm7,%ymm0,%ymm0
+	vpaddq	%ymm8,%ymm12,%ymm12
+	vpaddq	%ymm9,%ymm2,%ymm2
+
+	vpsrlq	$26,%ymm3,%ymm14
+	vpand	%ymm5,%ymm3,%ymm3
+	vpaddq	%ymm14,%ymm4,%ymm4
+
+	vpsrlq	$26,%ymm0,%ymm11
+	vpand	%ymm5,%ymm0,%ymm0
+	vpaddq	%ymm11,%ymm12,%ymm1
+
+	vpsrlq	$26,%ymm4,%ymm15
+	vpand	%ymm5,%ymm4,%ymm4
+
+	vpsrlq	$26,%ymm1,%ymm12
+	vpand	%ymm5,%ymm1,%ymm1
+	vpaddq	%ymm12,%ymm2,%ymm2
+
+	vpaddq	%ymm15,%ymm0,%ymm0
+	vpsllq	$2,%ymm15,%ymm15
+	vpaddq	%ymm15,%ymm0,%ymm0
+
+	vpsrlq	$26,%ymm2,%ymm13
+	vpand	%ymm5,%ymm2,%ymm2
+	vpaddq	%ymm13,%ymm3,%ymm3
+
+	vpsrlq	$26,%ymm0,%ymm11
+	vpand	%ymm5,%ymm0,%ymm0
+	vpaddq	%ymm11,%ymm1,%ymm1
+
+	vpsrlq	$26,%ymm3,%ymm14
+	vpand	%ymm5,%ymm3,%ymm3
+	vpaddq	%ymm14,%ymm4,%ymm4
+
+	vmovd	%xmm0,-112(%rdi)
+	vmovd	%xmm1,-108(%rdi)
+	vmovd	%xmm2,-104(%rdi)
+	vmovd	%xmm3,-100(%rdi)
+	vmovd	%xmm4,-96(%rdi)
+	leaq	-8(%r10),%rsp
+
+	vzeroupper
+	ret
+
+.Lblocks_avx512:
+
+	movl	$15,%eax
+	kmovw	%eax,%k2
+	leaq	8(%rsp),%r10
+
+	subq	$0x128,%rsp
+	leaq	.Lconst(%rip),%rcx
+	leaq	48+64(%rdi),%rdi
+	vmovdqa	96(%rcx),%ymm9
+
+	vmovdqu32	-64(%rdi),%zmm16{%k2}{z}
+	andq	$-512,%rsp
+	vmovdqu32	-48(%rdi),%zmm17{%k2}{z}
+	movq	$0x20,%rax
+	vmovdqu32	-32(%rdi),%zmm21{%k2}{z}
+	vmovdqu32	-16(%rdi),%zmm18{%k2}{z}
+	vmovdqu32	0(%rdi),%zmm22{%k2}{z}
+	vmovdqu32	16(%rdi),%zmm19{%k2}{z}
+	vmovdqu32	32(%rdi),%zmm23{%k2}{z}
+	vmovdqu32	48(%rdi),%zmm20{%k2}{z}
+	vmovdqu32	64(%rdi),%zmm24{%k2}{z}
+	vpermd	%zmm16,%zmm9,%zmm16
+	vpbroadcastq	64(%rcx),%zmm5
+	vpermd	%zmm17,%zmm9,%zmm17
+	vpermd	%zmm21,%zmm9,%zmm21
+	vpermd	%zmm18,%zmm9,%zmm18
+	vmovdqa64	%zmm16,0(%rsp){%k2}
+	vpsrlq	$32,%zmm16,%zmm7
+	vpermd	%zmm22,%zmm9,%zmm22
+	vmovdqu64	%zmm17,0(%rsp,%rax,1){%k2}
+	vpsrlq	$32,%zmm17,%zmm8
+	vpermd	%zmm19,%zmm9,%zmm19
+	vmovdqa64	%zmm21,64(%rsp){%k2}
+	vpermd	%zmm23,%zmm9,%zmm23
+	vpermd	%zmm20,%zmm9,%zmm20
+	vmovdqu64	%zmm18,64(%rsp,%rax,1){%k2}
+	vpermd	%zmm24,%zmm9,%zmm24
+	vmovdqa64	%zmm22,128(%rsp){%k2}
+	vmovdqu64	%zmm19,128(%rsp,%rax,1){%k2}
+	vmovdqa64	%zmm23,192(%rsp){%k2}
+	vmovdqu64	%zmm20,192(%rsp,%rax,1){%k2}
+	vmovdqa64	%zmm24,256(%rsp){%k2}
+
+	vpmuludq	%zmm7,%zmm16,%zmm11
+	vpmuludq	%zmm7,%zmm17,%zmm12
+	vpmuludq	%zmm7,%zmm18,%zmm13
+	vpmuludq	%zmm7,%zmm19,%zmm14
+	vpmuludq	%zmm7,%zmm20,%zmm15
+	vpsrlq	$32,%zmm18,%zmm9
+
+	vpmuludq	%zmm8,%zmm24,%zmm25
+	vpmuludq	%zmm8,%zmm16,%zmm26
+	vpmuludq	%zmm8,%zmm17,%zmm27
+	vpmuludq	%zmm8,%zmm18,%zmm28
+	vpmuludq	%zmm8,%zmm19,%zmm29
+	vpsrlq	$32,%zmm19,%zmm10
+	vpaddq	%zmm25,%zmm11,%zmm11
+	vpaddq	%zmm26,%zmm12,%zmm12
+	vpaddq	%zmm27,%zmm13,%zmm13
+	vpaddq	%zmm28,%zmm14,%zmm14
+	vpaddq	%zmm29,%zmm15,%zmm15
+
+	vpmuludq	%zmm9,%zmm23,%zmm25
+	vpmuludq	%zmm9,%zmm24,%zmm26
+	vpmuludq	%zmm9,%zmm17,%zmm28
+	vpmuludq	%zmm9,%zmm18,%zmm29
+	vpmuludq	%zmm9,%zmm16,%zmm27
+	vpsrlq	$32,%zmm20,%zmm6
+	vpaddq	%zmm25,%zmm11,%zmm11
+	vpaddq	%zmm26,%zmm12,%zmm12
+	vpaddq	%zmm28,%zmm14,%zmm14
+	vpaddq	%zmm29,%zmm15,%zmm15
+	vpaddq	%zmm27,%zmm13,%zmm13
+
+	vpmuludq	%zmm10,%zmm22,%zmm25
+	vpmuludq	%zmm10,%zmm16,%zmm28
+	vpmuludq	%zmm10,%zmm17,%zmm29
+	vpmuludq	%zmm10,%zmm23,%zmm26
+	vpmuludq	%zmm10,%zmm24,%zmm27
+	vpaddq	%zmm25,%zmm11,%zmm11
+	vpaddq	%zmm28,%zmm14,%zmm14
+	vpaddq	%zmm29,%zmm15,%zmm15
+	vpaddq	%zmm26,%zmm12,%zmm12
+	vpaddq	%zmm27,%zmm13,%zmm13
+
+	vpmuludq	%zmm6,%zmm24,%zmm28
+	vpmuludq	%zmm6,%zmm16,%zmm29
+	vpmuludq	%zmm6,%zmm21,%zmm25
+	vpmuludq	%zmm6,%zmm22,%zmm26
+	vpmuludq	%zmm6,%zmm23,%zmm27
+	vpaddq	%zmm28,%zmm14,%zmm14
+	vpaddq	%zmm29,%zmm15,%zmm15
+	vpaddq	%zmm25,%zmm11,%zmm11
+	vpaddq	%zmm26,%zmm12,%zmm12
+	vpaddq	%zmm27,%zmm13,%zmm13
+
+	vmovdqu64	0(%rsi),%zmm10
+	vmovdqu64	64(%rsi),%zmm6
+	leaq	128(%rsi),%rsi
+
+	vpsrlq	$26,%zmm14,%zmm28
+	vpandq	%zmm5,%zmm14,%zmm14
+	vpaddq	%zmm28,%zmm15,%zmm15
+
+	vpsrlq	$26,%zmm11,%zmm25
+	vpandq	%zmm5,%zmm11,%zmm11
+	vpaddq	%zmm25,%zmm12,%zmm12
+
+	vpsrlq	$26,%zmm15,%zmm29
+	vpandq	%zmm5,%zmm15,%zmm15
+
+	vpsrlq	$26,%zmm12,%zmm26
+	vpandq	%zmm5,%zmm12,%zmm12
+	vpaddq	%zmm26,%zmm13,%zmm13
+
+	vpaddq	%zmm29,%zmm11,%zmm11
+	vpsllq	$2,%zmm29,%zmm29
+	vpaddq	%zmm29,%zmm11,%zmm11
+
+	vpsrlq	$26,%zmm13,%zmm27
+	vpandq	%zmm5,%zmm13,%zmm13
+	vpaddq	%zmm27,%zmm14,%zmm14
+
+	vpsrlq	$26,%zmm11,%zmm25
+	vpandq	%zmm5,%zmm11,%zmm11
+	vpaddq	%zmm25,%zmm12,%zmm12
+
+	vpsrlq	$26,%zmm14,%zmm28
+	vpandq	%zmm5,%zmm14,%zmm14
+	vpaddq	%zmm28,%zmm15,%zmm15
+
+	vpunpcklqdq	%zmm6,%zmm10,%zmm7
+	vpunpckhqdq	%zmm6,%zmm10,%zmm6
+
+	vmovdqa32	128(%rcx),%zmm25
+	movl	$0x7777,%eax
+	kmovw	%eax,%k1
+
+	vpermd	%zmm16,%zmm25,%zmm16
+	vpermd	%zmm17,%zmm25,%zmm17
+	vpermd	%zmm18,%zmm25,%zmm18
+	vpermd	%zmm19,%zmm25,%zmm19
+	vpermd	%zmm20,%zmm25,%zmm20
+
+	vpermd	%zmm11,%zmm25,%zmm16{%k1}
+	vpermd	%zmm12,%zmm25,%zmm17{%k1}
+	vpermd	%zmm13,%zmm25,%zmm18{%k1}
+	vpermd	%zmm14,%zmm25,%zmm19{%k1}
+	vpermd	%zmm15,%zmm25,%zmm20{%k1}
+
+	vpslld	$2,%zmm17,%zmm21
+	vpslld	$2,%zmm18,%zmm22
+	vpslld	$2,%zmm19,%zmm23
+	vpslld	$2,%zmm20,%zmm24
+	vpaddd	%zmm17,%zmm21,%zmm21
+	vpaddd	%zmm18,%zmm22,%zmm22
+	vpaddd	%zmm19,%zmm23,%zmm23
+	vpaddd	%zmm20,%zmm24,%zmm24
+
+	vpbroadcastq	32(%rcx),%zmm30
+
+	vpsrlq	$52,%zmm7,%zmm9
+	vpsllq	$12,%zmm6,%zmm10
+	vporq	%zmm10,%zmm9,%zmm9
+	vpsrlq	$26,%zmm7,%zmm8
+	vpsrlq	$14,%zmm6,%zmm10
+	vpsrlq	$40,%zmm6,%zmm6
+	vpandq	%zmm5,%zmm9,%zmm9
+	vpandq	%zmm5,%zmm7,%zmm7
+
+	vpaddq	%zmm2,%zmm9,%zmm2
+	subq	$192,%rdx
+	jbe	.Ltail_avx512
+	jmp	.Loop_avx512
+
+.align	32
+.Loop_avx512:
+
+	vpmuludq	%zmm2,%zmm17,%zmm14
+	vpaddq	%zmm0,%zmm7,%zmm0
+	vpmuludq	%zmm2,%zmm18,%zmm15
+	vpandq	%zmm5,%zmm8,%zmm8
+	vpmuludq	%zmm2,%zmm23,%zmm11
+	vpandq	%zmm5,%zmm10,%zmm10
+	vpmuludq	%zmm2,%zmm24,%zmm12
+	vporq	%zmm30,%zmm6,%zmm6
+	vpmuludq	%zmm2,%zmm16,%zmm13
+	vpaddq	%zmm1,%zmm8,%zmm1
+	vpaddq	%zmm3,%zmm10,%zmm3
+	vpaddq	%zmm4,%zmm6,%zmm4
+
+	vmovdqu64	0(%rsi),%zmm10
+	vmovdqu64	64(%rsi),%zmm6
+	leaq	128(%rsi),%rsi
+	vpmuludq	%zmm0,%zmm19,%zmm28
+	vpmuludq	%zmm0,%zmm20,%zmm29
+	vpmuludq	%zmm0,%zmm16,%zmm25
+	vpmuludq	%zmm0,%zmm17,%zmm26
+	vpaddq	%zmm28,%zmm14,%zmm14
+	vpaddq	%zmm29,%zmm15,%zmm15
+	vpaddq	%zmm25,%zmm11,%zmm11
+	vpaddq	%zmm26,%zmm12,%zmm12
+
+	vpmuludq	%zmm1,%zmm18,%zmm28
+	vpmuludq	%zmm1,%zmm19,%zmm29
+	vpmuludq	%zmm1,%zmm24,%zmm25
+	vpmuludq	%zmm0,%zmm18,%zmm27
+	vpaddq	%zmm28,%zmm14,%zmm14
+	vpaddq	%zmm29,%zmm15,%zmm15
+	vpaddq	%zmm25,%zmm11,%zmm11
+	vpaddq	%zmm27,%zmm13,%zmm13
+
+	vpunpcklqdq	%zmm6,%zmm10,%zmm7
+	vpunpckhqdq	%zmm6,%zmm10,%zmm6
+
+	vpmuludq	%zmm3,%zmm16,%zmm28
+	vpmuludq	%zmm3,%zmm17,%zmm29
+	vpmuludq	%zmm1,%zmm16,%zmm26
+	vpmuludq	%zmm1,%zmm17,%zmm27
+	vpaddq	%zmm28,%zmm14,%zmm14
+	vpaddq	%zmm29,%zmm15,%zmm15
+	vpaddq	%zmm26,%zmm12,%zmm12
+	vpaddq	%zmm27,%zmm13,%zmm13
+
+	vpmuludq	%zmm4,%zmm24,%zmm28
+	vpmuludq	%zmm4,%zmm16,%zmm29
+	vpmuludq	%zmm3,%zmm22,%zmm25
+	vpmuludq	%zmm3,%zmm23,%zmm26
+	vpaddq	%zmm28,%zmm14,%zmm14
+	vpmuludq	%zmm3,%zmm24,%zmm27
+	vpaddq	%zmm29,%zmm15,%zmm15
+	vpaddq	%zmm25,%zmm11,%zmm11
+	vpaddq	%zmm26,%zmm12,%zmm12
+	vpaddq	%zmm27,%zmm13,%zmm13
+
+	vpmuludq	%zmm4,%zmm21,%zmm25
+	vpmuludq	%zmm4,%zmm22,%zmm26
+	vpmuludq	%zmm4,%zmm23,%zmm27
+	vpaddq	%zmm25,%zmm11,%zmm0
+	vpaddq	%zmm26,%zmm12,%zmm1
+	vpaddq	%zmm27,%zmm13,%zmm2
+
+	vpsrlq	$52,%zmm7,%zmm9
+	vpsllq	$12,%zmm6,%zmm10
+
+	vpsrlq	$26,%zmm14,%zmm3
+	vpandq	%zmm5,%zmm14,%zmm14
+	vpaddq	%zmm3,%zmm15,%zmm4
+
+	vporq	%zmm10,%zmm9,%zmm9
+
+	vpsrlq	$26,%zmm0,%zmm11
+	vpandq	%zmm5,%zmm0,%zmm0
+	vpaddq	%zmm11,%zmm1,%zmm1
+
+	vpandq	%zmm5,%zmm9,%zmm9
+
+	vpsrlq	$26,%zmm4,%zmm15
+	vpandq	%zmm5,%zmm4,%zmm4
+
+	vpsrlq	$26,%zmm1,%zmm12
+	vpandq	%zmm5,%zmm1,%zmm1
+	vpaddq	%zmm12,%zmm2,%zmm2
+
+	vpaddq	%zmm15,%zmm0,%zmm0
+	vpsllq	$2,%zmm15,%zmm15
+	vpaddq	%zmm15,%zmm0,%zmm0
+
+	vpaddq	%zmm9,%zmm2,%zmm2
+	vpsrlq	$26,%zmm7,%zmm8
+
+	vpsrlq	$26,%zmm2,%zmm13
+	vpandq	%zmm5,%zmm2,%zmm2
+	vpaddq	%zmm13,%zmm14,%zmm3
+
+	vpsrlq	$14,%zmm6,%zmm10
+
+	vpsrlq	$26,%zmm0,%zmm11
+	vpandq	%zmm5,%zmm0,%zmm0
+	vpaddq	%zmm11,%zmm1,%zmm1
+
+	vpsrlq	$40,%zmm6,%zmm6
+
+	vpsrlq	$26,%zmm3,%zmm14
+	vpandq	%zmm5,%zmm3,%zmm3
+	vpaddq	%zmm14,%zmm4,%zmm4
+
+	vpandq	%zmm5,%zmm7,%zmm7
+
+	subq	$128,%rdx
+	ja	.Loop_avx512
+
+.Ltail_avx512:
+
+	vpsrlq	$32,%zmm16,%zmm16
+	vpsrlq	$32,%zmm17,%zmm17
+	vpsrlq	$32,%zmm18,%zmm18
+	vpsrlq	$32,%zmm23,%zmm23
+	vpsrlq	$32,%zmm24,%zmm24
+	vpsrlq	$32,%zmm19,%zmm19
+	vpsrlq	$32,%zmm20,%zmm20
+	vpsrlq	$32,%zmm21,%zmm21
+	vpsrlq	$32,%zmm22,%zmm22
+
+	leaq	(%rsi,%rdx,1),%rsi
+
+	vpaddq	%zmm0,%zmm7,%zmm0
+
+	vpmuludq	%zmm2,%zmm17,%zmm14
+	vpmuludq	%zmm2,%zmm18,%zmm15
+	vpmuludq	%zmm2,%zmm23,%zmm11
+	vpandq	%zmm5,%zmm8,%zmm8
+	vpmuludq	%zmm2,%zmm24,%zmm12
+	vpandq	%zmm5,%zmm10,%zmm10
+	vpmuludq	%zmm2,%zmm16,%zmm13
+	vporq	%zmm30,%zmm6,%zmm6
+	vpaddq	%zmm1,%zmm8,%zmm1
+	vpaddq	%zmm3,%zmm10,%zmm3
+	vpaddq	%zmm4,%zmm6,%zmm4
+
+	vmovdqu	0(%rsi),%xmm7
+	vpmuludq	%zmm0,%zmm19,%zmm28
+	vpmuludq	%zmm0,%zmm20,%zmm29
+	vpmuludq	%zmm0,%zmm16,%zmm25
+	vpmuludq	%zmm0,%zmm17,%zmm26
+	vpaddq	%zmm28,%zmm14,%zmm14
+	vpaddq	%zmm29,%zmm15,%zmm15
+	vpaddq	%zmm25,%zmm11,%zmm11
+	vpaddq	%zmm26,%zmm12,%zmm12
+
+	vmovdqu	16(%rsi),%xmm8
+	vpmuludq	%zmm1,%zmm18,%zmm28
+	vpmuludq	%zmm1,%zmm19,%zmm29
+	vpmuludq	%zmm1,%zmm24,%zmm25
+	vpmuludq	%zmm0,%zmm18,%zmm27
+	vpaddq	%zmm28,%zmm14,%zmm14
+	vpaddq	%zmm29,%zmm15,%zmm15
+	vpaddq	%zmm25,%zmm11,%zmm11
+	vpaddq	%zmm27,%zmm13,%zmm13
+
+	vinserti128	$1,32(%rsi),%ymm7,%ymm7
+	vpmuludq	%zmm3,%zmm16,%zmm28
+	vpmuludq	%zmm3,%zmm17,%zmm29
+	vpmuludq	%zmm1,%zmm16,%zmm26
+	vpmuludq	%zmm1,%zmm17,%zmm27
+	vpaddq	%zmm28,%zmm14,%zmm14
+	vpaddq	%zmm29,%zmm15,%zmm15
+	vpaddq	%zmm26,%zmm12,%zmm12
+	vpaddq	%zmm27,%zmm13,%zmm13
+
+	vinserti128	$1,48(%rsi),%ymm8,%ymm8
+	vpmuludq	%zmm4,%zmm24,%zmm28
+	vpmuludq	%zmm4,%zmm16,%zmm29
+	vpmuludq	%zmm3,%zmm22,%zmm25
+	vpmuludq	%zmm3,%zmm23,%zmm26
+	vpmuludq	%zmm3,%zmm24,%zmm27
+	vpaddq	%zmm28,%zmm14,%zmm3
+	vpaddq	%zmm29,%zmm15,%zmm15
+	vpaddq	%zmm25,%zmm11,%zmm11
+	vpaddq	%zmm26,%zmm12,%zmm12
+	vpaddq	%zmm27,%zmm13,%zmm13
+
+	vpmuludq	%zmm4,%zmm21,%zmm25
+	vpmuludq	%zmm4,%zmm22,%zmm26
+	vpmuludq	%zmm4,%zmm23,%zmm27
+	vpaddq	%zmm25,%zmm11,%zmm0
+	vpaddq	%zmm26,%zmm12,%zmm1
+	vpaddq	%zmm27,%zmm13,%zmm2
+
+	movl	$1,%eax
+	vpermq	$0xb1,%zmm3,%zmm14
+	vpermq	$0xb1,%zmm15,%zmm4
+	vpermq	$0xb1,%zmm0,%zmm11
+	vpermq	$0xb1,%zmm1,%zmm12
+	vpermq	$0xb1,%zmm2,%zmm13
+	vpaddq	%zmm14,%zmm3,%zmm3
+	vpaddq	%zmm15,%zmm4,%zmm4
+	vpaddq	%zmm11,%zmm0,%zmm0
+	vpaddq	%zmm12,%zmm1,%zmm1
+	vpaddq	%zmm13,%zmm2,%zmm2
+
+	kmovw	%eax,%k3
+	vpermq	$0x2,%zmm3,%zmm14
+	vpermq	$0x2,%zmm4,%zmm15
+	vpermq	$0x2,%zmm0,%zmm11
+	vpermq	$0x2,%zmm1,%zmm12
+	vpermq	$0x2,%zmm2,%zmm13
+	vpaddq	%zmm14,%zmm3,%zmm3
+	vpaddq	%zmm15,%zmm4,%zmm4
+	vpaddq	%zmm11,%zmm0,%zmm0
+	vpaddq	%zmm12,%zmm1,%zmm1
+	vpaddq	%zmm13,%zmm2,%zmm2
+
+	vextracti64x4	$0x1,%zmm3,%ymm14
+	vextracti64x4	$0x1,%zmm4,%ymm15
+	vextracti64x4	$0x1,%zmm0,%ymm11
+	vextracti64x4	$0x1,%zmm1,%ymm12
+	vextracti64x4	$0x1,%zmm2,%ymm13
+	vpaddq	%zmm14,%zmm3,%zmm3{%k3}{z}
+	vpaddq	%zmm15,%zmm4,%zmm4{%k3}{z}
+	vpaddq	%zmm11,%zmm0,%zmm0{%k3}{z}
+	vpaddq	%zmm12,%zmm1,%zmm1{%k3}{z}
+	vpaddq	%zmm13,%zmm2,%zmm2{%k3}{z}
+
+	vpsrlq	$26,%ymm3,%ymm14
+	vpand	%ymm5,%ymm3,%ymm3
+	vpsrldq	$6,%ymm7,%ymm9
+	vpsrldq	$6,%ymm8,%ymm10
+	vpunpckhqdq	%ymm8,%ymm7,%ymm6
+	vpaddq	%ymm14,%ymm4,%ymm4
+
+	vpsrlq	$26,%ymm0,%ymm11
+	vpand	%ymm5,%ymm0,%ymm0
+	vpunpcklqdq	%ymm10,%ymm9,%ymm9
+	vpunpcklqdq	%ymm8,%ymm7,%ymm7
+	vpaddq	%ymm11,%ymm1,%ymm1
+
+	vpsrlq	$26,%ymm4,%ymm15
+	vpand	%ymm5,%ymm4,%ymm4
+
+	vpsrlq	$26,%ymm1,%ymm12
+	vpand	%ymm5,%ymm1,%ymm1
+	vpsrlq	$30,%ymm9,%ymm10
+	vpsrlq	$4,%ymm9,%ymm9
+	vpaddq	%ymm12,%ymm2,%ymm2
+
+	vpaddq	%ymm15,%ymm0,%ymm0
+	vpsllq	$2,%ymm15,%ymm15
+	vpsrlq	$26,%ymm7,%ymm8
+	vpsrlq	$40,%ymm6,%ymm6
+	vpaddq	%ymm15,%ymm0,%ymm0
+
+	vpsrlq	$26,%ymm2,%ymm13
+	vpand	%ymm5,%ymm2,%ymm2
+	vpand	%ymm5,%ymm9,%ymm9
+	vpand	%ymm5,%ymm7,%ymm7
+	vpaddq	%ymm13,%ymm3,%ymm3
+
+	vpsrlq	$26,%ymm0,%ymm11
+	vpand	%ymm5,%ymm0,%ymm0
+	vpaddq	%ymm2,%ymm9,%ymm2
+	vpand	%ymm5,%ymm8,%ymm8
+	vpaddq	%ymm11,%ymm1,%ymm1
+
+	vpsrlq	$26,%ymm3,%ymm14
+	vpand	%ymm5,%ymm3,%ymm3
+	vpand	%ymm5,%ymm10,%ymm10
+	vpor	32(%rcx),%ymm6,%ymm6
+	vpaddq	%ymm14,%ymm4,%ymm4
+
+	leaq	144(%rsp),%rax
+	addq	$64,%rdx
+	jnz	.Ltail_avx2_512
+
+	vpsubq	%ymm9,%ymm2,%ymm2
+	vmovd	%xmm0,-112(%rdi)
+	vmovd	%xmm1,-108(%rdi)
+	vmovd	%xmm2,-104(%rdi)
+	vmovd	%xmm3,-100(%rdi)
+	vmovd	%xmm4,-96(%rdi)
+	vzeroall
+	leaq	-8(%r10),%rsp
+
+	ret
+
+ENDPROC(poly1305_blocks_avx512)
+#endif /* CONFIG_AS_AVX512 */
diff --git b/net/wireguard/device.c b/net/wireguard/device.c
new file mode 100644
index 0000000..31417ea
--- /dev/null
+++ b/net/wireguard/device.c
@@ -0,0 +1,415 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+#include "socket.h"
+#include "timers.h"
+#include "device.h"
+#include "ratelimiter.h"
+#include "peer.h"
+#include "messages.h"
+
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmp.h>
+#include <linux/suspend.h>
+#include <net/icmp.h>
+#include <net/rtnetlink.h>
+#include <net/ip_tunnels.h>
+#include <net/addrconf.h>
+
+static LIST_HEAD(device_list);
+
+static int open(struct net_device *dev)
+{
+	int ret;
+	struct wireguard_peer *peer;
+	struct wireguard_device *wg = netdev_priv(dev);
+#ifndef COMPAT_CANNOT_USE_IN6_DEV_GET
+	struct inet6_dev *dev_v6 = __in6_dev_get(dev);
+#endif
+	struct in_device *dev_v4 = __in_dev_get_rtnl(dev);
+
+	if (dev_v4) {
+		/* TODO: when we merge to mainline, put this check near the ip_rt_send_redirect
+		 * call of ip_forward in net/ipv4/ip_forward.c, similar to the current secpath
+		 * check, rather than turning it off like this. This is just a stop gap solution
+		 * while we're an out of tree module.
+		 */
+		IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
+		IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
+	}
+#ifndef COMPAT_CANNOT_USE_IN6_DEV_GET
+	if (dev_v6)
+#ifndef COMPAT_CANNOT_USE_DEV_CNF
+		dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
+#else
+		dev_v6->addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
+#endif
+#endif
+
+	ret = socket_init(wg, wg->incoming_port);
+	if (ret < 0)
+		return ret;
+	mutex_lock(&wg->device_update_lock);
+	list_for_each_entry(peer, &wg->peer_list, peer_list) {
+		packet_send_staged_packets(peer);
+		if (peer->persistent_keepalive_interval)
+			packet_send_keepalive(peer);
+	}
+	mutex_unlock(&wg->device_update_lock);
+	return 0;
+}
+
+#if defined(CONFIG_PM_SLEEP) && !defined(CONFIG_ANDROID)
+static int pm_notification(struct notifier_block *nb, unsigned long action, void *data)
+{
+	struct wireguard_device *wg;
+	struct wireguard_peer *peer;
+
+	if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE)
+		return 0;
+
+	rtnl_lock();
+	list_for_each_entry(wg, &device_list, device_list) {
+		mutex_lock(&wg->device_update_lock);
+		list_for_each_entry(peer, &wg->peer_list, peer_list) {
+			noise_handshake_clear(&peer->handshake);
+			noise_keypairs_clear(&peer->keypairs);
+			if (peer->timers_enabled)
+				del_timer(&peer->timer_zero_key_material);
+		}
+		mutex_unlock(&wg->device_update_lock);
+	}
+	rtnl_unlock();
+	rcu_barrier_bh();
+	return 0;
+}
+static struct notifier_block pm_notifier = { .notifier_call = pm_notification };
+#endif
+
+static int stop(struct net_device *dev)
+{
+	struct wireguard_device *wg = netdev_priv(dev);
+	struct wireguard_peer *peer;
+
+	mutex_lock(&wg->device_update_lock);
+	list_for_each_entry(peer, &wg->peer_list, peer_list) {
+		skb_queue_purge(&peer->staged_packet_queue);
+		timers_stop(peer);
+		noise_handshake_clear(&peer->handshake);
+		noise_keypairs_clear(&peer->keypairs);
+		peer->last_sent_handshake = get_jiffies_64() - REKEY_TIMEOUT - HZ;
+	}
+	mutex_unlock(&wg->device_update_lock);
+	skb_queue_purge(&wg->incoming_handshakes);
+	socket_reinit(wg, NULL, NULL);
+	return 0;
+}
+
+static netdev_tx_t xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct wireguard_device *wg = netdev_priv(dev);
+	struct wireguard_peer *peer;
+	struct sk_buff *next;
+	struct sk_buff_head packets;
+	sa_family_t family;
+	int ret;
+
+	if (unlikely(skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) {
+		ret = -EPROTONOSUPPORT;
+		net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
+		goto err;
+	}
+
+	peer = allowedips_lookup_dst(&wg->peer_allowedips, skb);
+	if (unlikely(!peer)) {
+		ret = -ENOKEY;
+		net_dbg_skb_ratelimited("%s: No peer is configured for %pISc\n", dev->name, skb);
+		goto err;
+	}
+
+	family = READ_ONCE(peer->endpoint.addr.sa_family);
+	if (unlikely(family != AF_INET && family != AF_INET6)) {
+		ret = -EDESTADDRREQ;
+		net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n", dev->name, peer->internal_id);
+		goto err_peer;
+	}
+
+	__skb_queue_head_init(&packets);
+	if (!skb_is_gso(skb))
+		skb->next = NULL;
+	else {
+		struct sk_buff *segs = skb_gso_segment(skb, 0);
+
+		if (unlikely(IS_ERR(segs))) {
+			ret = PTR_ERR(segs);
+			goto err_peer;
+		}
+		dev_kfree_skb(skb);
+		skb = segs;
+	}
+	do {
+		next = skb->next;
+		skb->next = skb->prev = NULL;
+
+		skb = skb_share_check(skb, GFP_ATOMIC);
+		if (unlikely(!skb))
+			continue;
+
+		/* We only need to keep the original dst around for icmp,
+		 * so at this point we're in a position to drop it.
+		 */
+		skb_dst_drop(skb);
+
+		__skb_queue_tail(&packets, skb);
+	} while ((skb = next) != NULL);
+
+	spin_lock_bh(&peer->staged_packet_queue.lock);
+	/* If the queue is getting too big, we start removing the oldest packets until it's small again.
+	 * We do this before adding the new packet, so we don't remove GSO segments that are in excess.
+	 */
+	while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS)
+		dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue));
+	skb_queue_splice_tail(&packets, &peer->staged_packet_queue);
+	spin_unlock_bh(&peer->staged_packet_queue.lock);
+
+	packet_send_staged_packets(peer);
+
+	peer_put(peer);
+	return NETDEV_TX_OK;
+
+err_peer:
+	peer_put(peer);
+err:
+	++dev->stats.tx_errors;
+	if (skb->protocol == htons(ETH_P_IP))
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+	kfree_skb(skb);
+	return ret;
+}
+
+static const struct net_device_ops netdev_ops = {
+	.ndo_open		= open,
+	.ndo_stop		= stop,
+	.ndo_start_xmit		= xmit,
+	.ndo_get_stats64	= ip_tunnel_get_stats64
+};
+
+static void destruct(struct net_device *dev)
+{
+	struct wireguard_device *wg = netdev_priv(dev);
+
+	rtnl_lock();
+	list_del(&wg->device_list);
+	rtnl_unlock();
+	mutex_lock(&wg->device_update_lock);
+	wg->incoming_port = 0;
+	socket_reinit(wg, NULL, NULL);
+	peer_remove_all(wg); /* The final references are cleared in the below calls to destroy_workqueue. */
+	destroy_workqueue(wg->handshake_receive_wq);
+	destroy_workqueue(wg->handshake_send_wq);
+	packet_queue_free(&wg->decrypt_queue, true);
+	packet_queue_free(&wg->encrypt_queue, true);
+	destroy_workqueue(wg->packet_crypt_wq);
+	rcu_barrier_bh(); /* Wait for all the peers to be actually freed. */
+	allowedips_free(&wg->peer_allowedips, &wg->device_update_lock);
+	ratelimiter_uninit();
+	memzero_explicit(&wg->static_identity, sizeof(struct noise_static_identity));
+	skb_queue_purge(&wg->incoming_handshakes);
+	free_percpu(dev->tstats);
+	free_percpu(wg->incoming_handshakes_worker);
+	if (wg->have_creating_net_ref)
+		put_net(wg->creating_net);
+	mutex_unlock(&wg->device_update_lock);
+
+	pr_debug("%s: Interface deleted\n", dev->name);
+	free_netdev(dev);
+}
+
+static void setup(struct net_device *dev)
+{
+	struct wireguard_device *wg = netdev_priv(dev);
+	enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
+
+	dev->netdev_ops = &netdev_ops;
+	dev->hard_header_len = 0;
+	dev->addr_len = 0;
+	dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
+	dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE);
+	dev->type = ARPHRD_NONE;
+	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
+#ifndef COMPAT_CANNOT_USE_IFF_NO_QUEUE
+	dev->priv_flags |= IFF_NO_QUEUE;
+#else
+	dev->tx_queue_len = 0;
+#endif
+	dev->features |= NETIF_F_LLTX;
+	dev->features |= WG_NETDEV_FEATURES;
+	dev->hw_features |= WG_NETDEV_FEATURES;
+	dev->hw_enc_features |= WG_NETDEV_FEATURES;
+	dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - sizeof(struct udphdr) - max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
+
+	/* We need to keep the dst around in case of icmp replies. */
+	netif_keep_dst(dev);
+
+	memset(wg, 0, sizeof(struct wireguard_device));
+	wg->dev = dev;
+}
+
+static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack)
+{
+	int ret = -ENOMEM;
+	struct wireguard_device *wg = netdev_priv(dev);
+
+	wg->creating_net = src_net;
+	init_rwsem(&wg->static_identity.lock);
+	mutex_init(&wg->socket_update_lock);
+	mutex_init(&wg->device_update_lock);
+	skb_queue_head_init(&wg->incoming_handshakes);
+	pubkey_hashtable_init(&wg->peer_hashtable);
+	index_hashtable_init(&wg->index_hashtable);
+	allowedips_init(&wg->peer_allowedips);
+	cookie_checker_init(&wg->cookie_checker, wg);
+	INIT_LIST_HEAD(&wg->peer_list);
+	wg->device_update_gen = 1;
+
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		goto error_1;
+
+	wg->incoming_handshakes_worker = packet_alloc_percpu_multicore_worker(packet_handshake_receive_worker, wg);
+	if (!wg->incoming_handshakes_worker)
+		goto error_2;
+
+	wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
+	if (!wg->handshake_receive_wq)
+		goto error_3;
+
+	wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
+	if (!wg->handshake_send_wq)
+		goto error_4;
+
+	wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name);
+	if (!wg->packet_crypt_wq)
+		goto error_5;
+
+	if (packet_queue_init(&wg->encrypt_queue, packet_encrypt_worker, true, MAX_QUEUED_PACKETS) < 0)
+		goto error_6;
+
+	if (packet_queue_init(&wg->decrypt_queue, packet_decrypt_worker, true, MAX_QUEUED_PACKETS) < 0)
+		goto error_7;
+
+	ret = ratelimiter_init();
+	if (ret < 0)
+		goto error_8;
+
+	ret = register_netdevice(dev);
+	if (ret < 0)
+		goto error_9;
+
+	list_add(&wg->device_list, &device_list);
+
+	/* We wait until the end to assign priv_destructor, so that register_netdevice doesn't
+	 * call it for us if it fails.
+	 */
+	dev->priv_destructor = destruct;
+
+	pr_debug("%s: Interface created\n", dev->name);
+	return ret;
+
+error_9:
+	ratelimiter_uninit();
+error_8:
+	packet_queue_free(&wg->decrypt_queue, true);
+error_7:
+	packet_queue_free(&wg->encrypt_queue, true);
+error_6:
+	destroy_workqueue(wg->packet_crypt_wq);
+error_5:
+	destroy_workqueue(wg->handshake_send_wq);
+error_4:
+	destroy_workqueue(wg->handshake_receive_wq);
+error_3:
+	free_percpu(wg->incoming_handshakes_worker);
+error_2:
+	free_percpu(dev->tstats);
+error_1:
+	return ret;
+}
+
+static struct rtnl_link_ops link_ops __read_mostly = {
+	.kind			= KBUILD_MODNAME,
+	.priv_size		= sizeof(struct wireguard_device),
+	.setup			= setup,
+	.newlink		= newlink,
+};
+
+static int netdevice_notification(struct notifier_block *nb, unsigned long action, void *data)
+{
+	struct net_device *dev = ((struct netdev_notifier_info *)data)->dev;
+	struct wireguard_device *wg = netdev_priv(dev);
+
+	ASSERT_RTNL();
+
+	if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops)
+		return 0;
+
+	if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) {
+		put_net(wg->creating_net);
+		wg->have_creating_net_ref = false;
+	} else if (dev_net(dev) != wg->creating_net && !wg->have_creating_net_ref) {
+		wg->have_creating_net_ref = true;
+		get_net(wg->creating_net);
+	}
+	return 0;
+}
+
+static struct notifier_block netdevice_notifier = { .notifier_call = netdevice_notification };
+
+int __init device_init(void)
+{
+	int ret;
+
+#if defined(CONFIG_PM_SLEEP) && !defined(CONFIG_ANDROID)
+	ret = register_pm_notifier(&pm_notifier);
+	if (ret)
+		return ret;
+#endif
+
+	ret = register_netdevice_notifier(&netdevice_notifier);
+	if (ret)
+		goto error_pm;
+
+	ret = rtnl_link_register(&link_ops);
+	if (ret)
+		goto error_netdevice;
+
+	return 0;
+
+error_netdevice:
+	unregister_netdevice_notifier(&netdevice_notifier);
+error_pm:
+#if defined(CONFIG_PM_SLEEP) && !defined(CONFIG_ANDROID)
+	unregister_pm_notifier(&pm_notifier);
+#endif
+	return ret;
+}
+
+void device_uninit(void)
+{
+	rtnl_link_unregister(&link_ops);
+	unregister_netdevice_notifier(&netdevice_notifier);
+#if defined(CONFIG_PM_SLEEP) && !defined(CONFIG_ANDROID)
+	unregister_pm_notifier(&pm_notifier);
+#endif
+	rcu_barrier_bh();
+}
diff --git b/net/wireguard/device.h b/net/wireguard/device.h
new file mode 100644
index 0000000..08803e8
--- /dev/null
+++ b/net/wireguard/device.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_DEVICE_H
+#define _WG_DEVICE_H
+
+#include "noise.h"
+#include "allowedips.h"
+#include "hashtables.h"
+#include "cookie.h"
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/ptr_ring.h>
+
+struct wireguard_device;
+
+struct multicore_worker {
+	void *ptr;
+	struct work_struct work;
+};
+
+struct crypt_queue {
+	struct ptr_ring ring;
+	union {
+		struct {
+			struct multicore_worker __percpu *worker;
+			int last_cpu;
+		};
+		struct work_struct work;
+	};
+};
+
+struct wireguard_device {
+	struct net_device *dev;
+	struct crypt_queue encrypt_queue, decrypt_queue;
+	struct sock __rcu *sock4, *sock6;
+	struct net *creating_net;
+	struct noise_static_identity static_identity;
+	struct workqueue_struct *handshake_receive_wq, *handshake_send_wq, *packet_crypt_wq;
+	struct sk_buff_head incoming_handshakes;
+	int incoming_handshake_cpu;
+	struct multicore_worker __percpu *incoming_handshakes_worker;
+	struct cookie_checker cookie_checker;
+	struct pubkey_hashtable peer_hashtable;
+	struct index_hashtable index_hashtable;
+	struct allowedips peer_allowedips;
+	struct mutex device_update_lock, socket_update_lock;
+	struct list_head device_list, peer_list;
+	unsigned int num_peers, device_update_gen;
+	u32 fwmark;
+	u16 incoming_port;
+	bool have_creating_net_ref;
+};
+
+int device_init(void);
+void device_uninit(void);
+
+#endif /* _WG_DEVICE_H */
diff --git b/net/wireguard/hashtables.c b/net/wireguard/hashtables.c
new file mode 100644
index 0000000..accb2c8
--- /dev/null
+++ b/net/wireguard/hashtables.c
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "hashtables.h"
+#include "peer.h"
+#include "noise.h"
+
+static inline struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table, const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
+{
+	/* siphash gives us a secure 64bit number based on a random key. Since the bits are
+	 * uniformly distributed, we can then mask off to get the bits we need.
+	 */
+	return &table->hashtable[siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key) & (HASH_SIZE(table->hashtable) - 1)];
+}
+
+void pubkey_hashtable_init(struct pubkey_hashtable *table)
+{
+	get_random_bytes(&table->key, sizeof(table->key));
+	hash_init(table->hashtable);
+	mutex_init(&table->lock);
+}
+
+void pubkey_hashtable_add(struct pubkey_hashtable *table, struct wireguard_peer *peer)
+{
+	mutex_lock(&table->lock);
+	hlist_add_head_rcu(&peer->pubkey_hash, pubkey_bucket(table, peer->handshake.remote_static));
+	mutex_unlock(&table->lock);
+}
+
+void pubkey_hashtable_remove(struct pubkey_hashtable *table, struct wireguard_peer *peer)
+{
+	mutex_lock(&table->lock);
+	hlist_del_init_rcu(&peer->pubkey_hash);
+	mutex_unlock(&table->lock);
+}
+
+/* Returns a strong reference to a peer */
+struct wireguard_peer *pubkey_hashtable_lookup(struct pubkey_hashtable *table, const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
+{
+	struct wireguard_peer *iter_peer, *peer = NULL;
+
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey), pubkey_hash) {
+		if (!memcmp(pubkey, iter_peer->handshake.remote_static, NOISE_PUBLIC_KEY_LEN)) {
+			peer = iter_peer;
+			break;
+		}
+	}
+	peer = peer_get(peer);
+	rcu_read_unlock_bh();
+	return peer;
+}
+
+static inline struct hlist_head *index_bucket(struct index_hashtable *table, const __le32 index)
+{
+	/* Since the indices are random and thus all bits are uniformly distributed,
+	 * we can find its bucket simply by masking.
+	 */
+	return &table->hashtable[(__force u32)index & (HASH_SIZE(table->hashtable) - 1)];
+}
+
+void index_hashtable_init(struct index_hashtable *table)
+{
+	hash_init(table->hashtable);
+	spin_lock_init(&table->lock);
+}
+
+/* At the moment, we limit ourselves to 2^20 total peers, which generally might amount to 2^20*3
+ * items in this hashtable. The algorithm below works by picking a random number and testing it.
+ * We can see that these limits mean we usually succeed pretty quickly:
+ *
+ * >>> def calculation(tries, size):
+ * ...     return (size / 2**32)**(tries - 1) *  (1 - (size / 2**32))
+ * ...
+ * >>> calculation(1, 2**20 * 3)
+ * 0.999267578125
+ * >>> calculation(2, 2**20 * 3)
+ * 0.0007318854331970215
+ * >>> calculation(3, 2**20 * 3)
+ * 5.360489012673497e-07
+ * >>> calculation(4, 2**20 * 3)
+ * 3.9261394135792216e-10
+ *
+ * At the moment, we don't do any masking, so this algorithm isn't exactly constant time in
+ * either the random guessing or in the hash list lookup. We could require a minimum of 3
+ * tries, which would successfully mask the guessing. TODO: this would not, however, help
+ * with the growing hash lengths.
+ */
+
+__le32 index_hashtable_insert(struct index_hashtable *table, struct index_hashtable_entry *entry)
+{
+	struct index_hashtable_entry *existing_entry;
+
+	spin_lock_bh(&table->lock);
+	hlist_del_init_rcu(&entry->index_hash);
+	spin_unlock_bh(&table->lock);
+
+	rcu_read_lock_bh();
+
+search_unused_slot:
+	/* First we try to find an unused slot, randomly, while unlocked. */
+	entry->index = (__force __le32)get_random_u32();
+	hlist_for_each_entry_rcu_bh(existing_entry, index_bucket(table, entry->index), index_hash) {
+		if (existing_entry->index == entry->index)
+			goto search_unused_slot; /* If it's already in use, we continue searching. */
+	}
+
+	/* Once we've found an unused slot, we lock it, and then double-check
+	 * that nobody else stole it from us.
+	 */
+	spin_lock_bh(&table->lock);
+	hlist_for_each_entry_rcu_bh(existing_entry, index_bucket(table, entry->index), index_hash) {
+		if (existing_entry->index == entry->index) {
+			spin_unlock_bh(&table->lock);
+			goto search_unused_slot; /* If it was stolen, we start over. */
+		}
+	}
+	/* Otherwise, we know we have it exclusively (since we're locked), so we insert. */
+	hlist_add_head_rcu(&entry->index_hash, index_bucket(table, entry->index));
+	spin_unlock_bh(&table->lock);
+
+	rcu_read_unlock_bh();
+
+	return entry->index;
+}
+
+bool index_hashtable_replace(struct index_hashtable *table, struct index_hashtable_entry *old, struct index_hashtable_entry *new)
+{
+	if (unlikely(hlist_unhashed(&old->index_hash)))
+		return false;
+	spin_lock_bh(&table->lock);
+	new->index = old->index;
+	hlist_replace_rcu(&old->index_hash, &new->index_hash);
+	INIT_HLIST_NODE(&old->index_hash);
+	spin_unlock_bh(&table->lock);
+	return true;
+}
+
+void index_hashtable_remove(struct index_hashtable *table, struct index_hashtable_entry *entry)
+{
+	spin_lock_bh(&table->lock);
+	hlist_del_init_rcu(&entry->index_hash);
+	spin_unlock_bh(&table->lock);
+}
+
+/* Returns a strong reference to a entry->peer */
+struct index_hashtable_entry *index_hashtable_lookup(struct index_hashtable *table, const enum index_hashtable_type type_mask, const __le32 index)
+{
+	struct index_hashtable_entry *iter_entry, *entry = NULL;
+
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index), index_hash) {
+		if (iter_entry->index == index) {
+			if (likely(iter_entry->type & type_mask))
+				entry = iter_entry;
+			break;
+		}
+	}
+	if (likely(entry)) {
+		entry->peer = peer_get(entry->peer);
+		if (unlikely(!entry->peer))
+			entry = NULL;
+	}
+	rcu_read_unlock_bh();
+	return entry;
+}
diff --git b/net/wireguard/hashtables.h b/net/wireguard/hashtables.h
new file mode 100644
index 0000000..aef1493
--- /dev/null
+++ b/net/wireguard/hashtables.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_HASHTABLES_H
+#define _WG_HASHTABLES_H
+
+#include "messages.h"
+
+#include <linux/hashtable.h>
+#include <linux/mutex.h>
+#include <linux/siphash.h>
+
+struct wireguard_peer;
+
+struct pubkey_hashtable {
+	/* TODO: move to rhashtable */
+	DECLARE_HASHTABLE(hashtable, 11);
+	siphash_key_t key;
+	struct mutex lock;
+};
+
+void pubkey_hashtable_init(struct pubkey_hashtable *table);
+void pubkey_hashtable_add(struct pubkey_hashtable *table, struct wireguard_peer *peer);
+void pubkey_hashtable_remove(struct pubkey_hashtable *table, struct wireguard_peer *peer);
+struct wireguard_peer *pubkey_hashtable_lookup(struct pubkey_hashtable *table, const u8 pubkey[NOISE_PUBLIC_KEY_LEN]);
+
+struct index_hashtable {
+	/* TODO: move to rhashtable */
+	DECLARE_HASHTABLE(hashtable, 13);
+	spinlock_t lock;
+};
+
+enum index_hashtable_type {
+	INDEX_HASHTABLE_HANDSHAKE = 1U << 0,
+	INDEX_HASHTABLE_KEYPAIR = 1U << 1
+};
+
+struct index_hashtable_entry {
+	struct wireguard_peer *peer;
+	struct hlist_node index_hash;
+	enum index_hashtable_type type;
+	__le32 index;
+};
+void index_hashtable_init(struct index_hashtable *table);
+__le32 index_hashtable_insert(struct index_hashtable *table, struct index_hashtable_entry *entry);
+bool index_hashtable_replace(struct index_hashtable *table, struct index_hashtable_entry *old, struct index_hashtable_entry *new);
+void index_hashtable_remove(struct index_hashtable *table, struct index_hashtable_entry *entry);
+struct index_hashtable_entry *index_hashtable_lookup(struct index_hashtable *table, const enum index_hashtable_type type_mask, const __le32 index);
+
+#endif /* _WG_HASHTABLES_H */
diff --git b/net/wireguard/main.c b/net/wireguard/main.c
new file mode 100644
index 0000000..c14ed4c
--- /dev/null
+++ b/net/wireguard/main.c
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "version.h"
+#include "device.h"
+#include "noise.h"
+#include "queueing.h"
+#include "ratelimiter.h"
+#include "netlink.h"
+#include "crypto/chacha20poly1305.h"
+#include "crypto/blake2s.h"
+#include "crypto/curve25519.h"
+#include "uapi/wireguard.h"
+
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/genetlink.h>
+#include <net/rtnetlink.h>
+
+static int __init mod_init(void)
+{
+	int ret;
+
+	chacha20poly1305_fpu_init();
+	blake2s_fpu_init();
+	curve25519_fpu_init();
+#ifdef DEBUG
+	if (!allowedips_selftest() || !packet_counter_selftest() || !curve25519_selftest() || !chacha20poly1305_selftest() || !blake2s_selftest() || !ratelimiter_selftest())
+		return -ENOTRECOVERABLE;
+#endif
+	noise_init();
+
+	ret = device_init();
+	if (ret < 0)
+		goto err_packet;
+
+	ret = genetlink_init();
+	if (ret < 0)
+		goto err_netlink;
+
+	pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n");
+	pr_info("Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.\n");
+
+	return 0;
+
+err_netlink:
+	device_uninit();
+err_packet:
+	return ret;
+}
+
+static void __exit mod_exit(void)
+{
+	genetlink_uninit();
+	device_uninit();
+	pr_debug("WireGuard unloaded\n");
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Fast, secure, and modern VPN tunnel");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+MODULE_VERSION(WIREGUARD_VERSION);
+MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME);
+MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME);
diff --git b/net/wireguard/messages.h b/net/wireguard/messages.h
new file mode 100644
index 0000000..6a183d2
--- /dev/null
+++ b/net/wireguard/messages.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * See doc/protocol.md for more info
+ */
+
+#ifndef _WG_MESSAGES_H
+#define _WG_MESSAGES_H
+
+#include "crypto/curve25519.h"
+#include "crypto/chacha20poly1305.h"
+#include "crypto/blake2s.h"
+
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/skbuff.h>
+
+enum noise_lengths {
+	NOISE_PUBLIC_KEY_LEN = CURVE25519_POINT_SIZE,
+	NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEYLEN,
+	NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32),
+	NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAGLEN,
+	NOISE_HASH_LEN = BLAKE2S_OUTBYTES
+};
+
+#define noise_encrypted_len(plain_len) (plain_len + NOISE_AUTHTAG_LEN)
+
+enum cookie_values {
+	COOKIE_SECRET_MAX_AGE = 2 * 60 * HZ,
+	COOKIE_SECRET_LATENCY = 5 * HZ,
+	COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCELEN,
+	COOKIE_LEN = 16
+};
+
+enum counter_values {
+	COUNTER_BITS_TOTAL = 2048,
+	COUNTER_REDUNDANT_BITS = BITS_PER_LONG,
+	COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS
+};
+
+enum limits {
+	REKEY_AFTER_MESSAGES = U64_MAX - 0xffff,
+	REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1,
+	REKEY_TIMEOUT = 5 * HZ,
+	REKEY_TIMEOUT_JITTER_MAX = HZ / 3,
+	REKEY_AFTER_TIME = 120 * HZ,
+	REJECT_AFTER_TIME = 180 * HZ,
+	INITIATIONS_PER_SECOND = HZ / 50,
+	MAX_PEERS_PER_DEVICE = 1U << 20,
+	KEEPALIVE_TIMEOUT = 10 * HZ,
+	MAX_TIMER_HANDSHAKES = (90 * HZ) / REKEY_TIMEOUT,
+	MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */
+	MAX_STAGED_PACKETS = 128,
+	MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */
+};
+
+enum message_type {
+	MESSAGE_INVALID = 0,
+	MESSAGE_HANDSHAKE_INITIATION = 1,
+	MESSAGE_HANDSHAKE_RESPONSE = 2,
+	MESSAGE_HANDSHAKE_COOKIE = 3,
+	MESSAGE_DATA = 4,
+	MESSAGE_TOTAL = 5
+};
+
+struct message_header {
+	/* The actual layout of this that we want is:
+	 * u8 type
+	 * u8 reserved_zero[3]
+	 *
+	 * But it turns out that by encoding this as little endian,
+	 * we achieve the same thing, and it makes checking faster.
+	 */
+	__le32 type;
+};
+
+struct message_macs {
+	u8 mac1[COOKIE_LEN];
+	u8 mac2[COOKIE_LEN];
+};
+
+struct message_handshake_initiation {
+	struct message_header header;
+	__le32 sender_index;
+	u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
+	u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)];
+	u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)];
+	struct message_macs macs;
+};
+
+struct message_handshake_response {
+	struct message_header header;
+	__le32 sender_index;
+	__le32 receiver_index;
+	u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
+	u8 encrypted_nothing[noise_encrypted_len(0)];
+	struct message_macs macs;
+};
+
+struct message_handshake_cookie {
+	struct message_header header;
+	__le32 receiver_index;
+	u8 nonce[COOKIE_NONCE_LEN];
+	u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)];
+};
+
+struct message_data {
+	struct message_header header;
+	__le32 key_idx;
+	__le64 counter;
+	u8 encrypted_data[];
+};
+
+#define message_data_len(plain_len) (noise_encrypted_len(plain_len) + sizeof(struct message_data))
+
+enum message_alignments {
+	MESSAGE_PADDING_MULTIPLE = 16,
+	MESSAGE_MINIMUM_LENGTH = message_data_len(0)
+};
+
+#define SKB_HEADER_LEN (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + sizeof(struct udphdr) + NET_SKB_PAD)
+#define DATA_PACKET_HEAD_ROOM ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4)
+
+enum {
+	HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */
+};
+
+#endif /* _WG_MESSAGES_H */
diff --git b/net/wireguard/netlink.c b/net/wireguard/netlink.c
new file mode 100644
index 0000000..6abdab0
--- /dev/null
+++ b/net/wireguard/netlink.c
@@ -0,0 +1,512 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "netlink.h"
+#include "device.h"
+#include "peer.h"
+#include "socket.h"
+#include "queueing.h"
+#include "messages.h"
+#include "uapi/wireguard.h"
+#include <linux/if.h>
+#include <net/genetlink.h>
+#include <net/sock.h>
+
+static struct genl_family genl_family;
+
+static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = {
+	[WGDEVICE_A_IFINDEX]	= { .type = NLA_U32 },
+	[WGDEVICE_A_IFNAME]	= { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[WGDEVICE_A_PRIVATE_KEY]= { .len = NOISE_PUBLIC_KEY_LEN },
+	[WGDEVICE_A_PUBLIC_KEY]	= { .len = NOISE_PUBLIC_KEY_LEN },
+	[WGDEVICE_A_FLAGS]	= { .type = NLA_U32 },
+	[WGDEVICE_A_LISTEN_PORT]= { .type = NLA_U16 },
+	[WGDEVICE_A_FWMARK]	= { .type = NLA_U32 },
+	[WGDEVICE_A_PEERS]	= { .type = NLA_NESTED }
+};
+
+static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = {
+	[WGPEER_A_PUBLIC_KEY]			= { .len = NOISE_PUBLIC_KEY_LEN },
+	[WGPEER_A_PRESHARED_KEY]		= { .len = NOISE_SYMMETRIC_KEY_LEN },
+	[WGPEER_A_FLAGS]			= { .type = NLA_U32 },
+	[WGPEER_A_ENDPOINT]			= { .len = sizeof(struct sockaddr) },
+	[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]= { .type = NLA_U16 },
+	[WGPEER_A_LAST_HANDSHAKE_TIME]		= { .len = sizeof(struct timespec) },
+	[WGPEER_A_RX_BYTES]			= { .type = NLA_U64 },
+	[WGPEER_A_TX_BYTES]			= { .type = NLA_U64 },
+	[WGPEER_A_ALLOWEDIPS]			= { .type = NLA_NESTED }
+};
+
+static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = {
+	[WGALLOWEDIP_A_FAMILY]		= { .type = NLA_U16 },
+	[WGALLOWEDIP_A_IPADDR]		= { .len = sizeof(struct in_addr) },
+	[WGALLOWEDIP_A_CIDR_MASK]	= { .type = NLA_U8 }
+};
+
+static struct wireguard_device *lookup_interface(struct nlattr **attrs, struct sk_buff *skb)
+{
+	struct net_device *dev = NULL;
+
+	if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME])
+		return ERR_PTR(-EBADR);
+	if (attrs[WGDEVICE_A_IFINDEX])
+		dev = dev_get_by_index(sock_net(skb->sk), nla_get_u32(attrs[WGDEVICE_A_IFINDEX]));
+	else if (attrs[WGDEVICE_A_IFNAME])
+		dev = dev_get_by_name(sock_net(skb->sk), nla_data(attrs[WGDEVICE_A_IFNAME]));
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+	if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind || strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) {
+		dev_put(dev);
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+	return netdev_priv(dev);
+}
+
+struct allowedips_ctx {
+	struct sk_buff *skb;
+	unsigned int i;
+};
+
+static int get_allowedips(void *ctx, const u8 *ip, u8 cidr, int family)
+{
+	struct nlattr *allowedip_nest;
+	struct allowedips_ctx *actx = ctx;
+
+	allowedip_nest = nla_nest_start(actx->skb, actx->i++);
+	if (!allowedip_nest)
+		return -EMSGSIZE;
+
+	if (nla_put_u8(actx->skb, WGALLOWEDIP_A_CIDR_MASK, cidr) || nla_put_u16(actx->skb, WGALLOWEDIP_A_FAMILY, family) ||
+	    nla_put(actx->skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ? sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) {
+		nla_nest_cancel(actx->skb, allowedip_nest);
+		return -EMSGSIZE;
+	}
+
+	nla_nest_end(actx->skb, allowedip_nest);
+	return 0;
+}
+
+static int get_peer(struct wireguard_peer *peer, unsigned int index, struct allowedips_cursor *rt_cursor, struct sk_buff *skb)
+{
+	struct allowedips_ctx ctx = { .skb = skb };
+	struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, index);
+	bool fail;
+
+	if (!peer_nest)
+		return -EMSGSIZE;
+
+	down_read(&peer->handshake.lock);
+	fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN, peer->handshake.remote_static);
+	up_read(&peer->handshake.lock);
+	if (fail)
+		goto err;
+
+	if (!rt_cursor->seq) {
+		down_read(&peer->handshake.lock);
+		fail = nla_put(skb, WGPEER_A_PRESHARED_KEY, NOISE_SYMMETRIC_KEY_LEN, peer->handshake.preshared_key);
+		up_read(&peer->handshake.lock);
+		if (fail)
+			goto err;
+
+		if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME, sizeof(struct timespec), &peer->walltime_last_handshake) || nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, peer->persistent_keepalive_interval / HZ) ||
+				nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes, WGPEER_A_UNSPEC) || nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes, WGPEER_A_UNSPEC))
+			goto err;
+
+		read_lock_bh(&peer->endpoint_lock);
+		if (peer->endpoint.addr.sa_family == AF_INET)
+			fail = nla_put(skb, WGPEER_A_ENDPOINT, sizeof(struct sockaddr_in), &peer->endpoint.addr4);
+		else if (peer->endpoint.addr.sa_family == AF_INET6)
+			fail = nla_put(skb, WGPEER_A_ENDPOINT, sizeof(struct sockaddr_in6), &peer->endpoint.addr6);
+		read_unlock_bh(&peer->endpoint_lock);
+		if (fail)
+			goto err;
+	}
+
+	allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS);
+	if (!allowedips_nest)
+		goto err;
+	if (allowedips_walk_by_peer(&peer->device->peer_allowedips, rt_cursor, peer, get_allowedips, &ctx, &peer->device->device_update_lock)) {
+		nla_nest_end(skb, allowedips_nest);
+		nla_nest_end(skb, peer_nest);
+		return -EMSGSIZE;
+	}
+	memset(rt_cursor, 0, sizeof(*rt_cursor));
+	nla_nest_end(skb, allowedips_nest);
+	nla_nest_end(skb, peer_nest);
+	return 0;
+err:
+	nla_nest_cancel(skb, peer_nest);
+	return -EMSGSIZE;
+}
+
+static int get_device_start(struct netlink_callback *cb)
+{
+	struct wireguard_device *wg;
+	struct nlattr **attrs = genl_family_attrbuf(&genl_family);
+	int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + genl_family.hdrsize, attrs, genl_family.maxattr, device_policy, NULL);
+
+	if (ret < 0)
+		return ret;
+	cb->args[2] = (long)kzalloc(sizeof(struct allowedips_cursor), GFP_KERNEL);
+	if (!cb->args[2])
+		return -ENOMEM;
+	wg = lookup_interface(attrs, cb->skb);
+	if (IS_ERR(wg)) {
+		kfree((void *)cb->args[2]);
+		cb->args[2] = 0;
+		return PTR_ERR(wg);
+	}
+	cb->args[0] = (long)wg;
+	return 0;
+}
+
+static int get_device_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct wireguard_device *wg = (struct wireguard_device *)cb->args[0];
+	struct wireguard_peer *peer, *next_peer_cursor = NULL, *last_peer_cursor = (struct wireguard_peer *)cb->args[1];
+	struct allowedips_cursor *rt_cursor = (struct allowedips_cursor *)cb->args[2];
+	unsigned int peer_idx = 0;
+	struct nlattr *peers_nest;
+	bool done = true;
+	void *hdr;
+	int ret = -EMSGSIZE;
+
+	rtnl_lock();
+	mutex_lock(&wg->device_update_lock);
+	cb->seq = wg->device_update_gen;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE);
+	if (!hdr)
+		goto out;
+	genl_dump_check_consistent(cb, hdr);
+
+	if (!last_peer_cursor) {
+		if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT, wg->incoming_port) || nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) || nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) || nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name))
+			goto out;
+
+		down_read(&wg->static_identity.lock);
+		if (wg->static_identity.has_identity) {
+			if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY, NOISE_PUBLIC_KEY_LEN, wg->static_identity.static_private) || nla_put(skb, WGDEVICE_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN, wg->static_identity.static_public)) {
+				up_read(&wg->static_identity.lock);
+				goto out;
+			}
+		}
+		up_read(&wg->static_identity.lock);
+	}
+
+	peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS);
+	if (!peers_nest)
+		goto out;
+	ret = 0;
+	/* If the last cursor was removed via list_del_init in peer_remove, then we just treat
+	 * this the same as there being no more peers left. The reason is that seq_nr should
+	 * indicate to userspace that this isn't a coherent dump anyway, so they'll try again.
+	 */
+	if (list_empty(&wg->peer_list) || (last_peer_cursor && list_empty(&last_peer_cursor->peer_list))) {
+		nla_nest_cancel(skb, peers_nest);
+		goto out;
+	}
+	lockdep_assert_held(&wg->device_update_lock);
+	peer = list_prepare_entry(last_peer_cursor, &wg->peer_list, peer_list);
+	list_for_each_entry_continue(peer, &wg->peer_list, peer_list) {
+		if (get_peer(peer, peer_idx++, rt_cursor, skb)) {
+			done = false;
+			break;
+		}
+		next_peer_cursor = peer;
+	}
+	nla_nest_end(skb, peers_nest);
+
+out:
+	peer_put(last_peer_cursor);
+	if (!ret && !done)
+		next_peer_cursor = peer_rcu_get(next_peer_cursor);
+	mutex_unlock(&wg->device_update_lock);
+	rtnl_unlock();
+
+	if (ret) {
+		genlmsg_cancel(skb, hdr);
+		return ret;
+	}
+	genlmsg_end(skb, hdr);
+	if (done) {
+		cb->args[1] = 0;
+		return 0;
+	}
+	cb->args[1] = (long)next_peer_cursor;
+	return skb->len;
+
+	/* At this point, we can't really deal ourselves with safely zeroing out
+	 * the private key material after usage. This will need an additional API
+	 * in the kernel for marking skbs as zero_on_free.
+	 */
+}
+
+static int get_device_done(struct netlink_callback *cb)
+{
+	struct wireguard_device *wg = (struct wireguard_device *)cb->args[0];
+	struct wireguard_peer *peer = (struct wireguard_peer *)cb->args[1];
+	struct allowedips_cursor *rt_cursor = (struct allowedips_cursor *)cb->args[2];
+
+	if (wg)
+		dev_put(wg->dev);
+	kfree(rt_cursor);
+	peer_put(peer);
+	return 0;
+}
+
+static int set_port(struct wireguard_device *wg, u16 port)
+{
+	struct wireguard_peer *peer;
+
+	if (wg->incoming_port == port)
+		return 0;
+	list_for_each_entry(peer, &wg->peer_list, peer_list)
+		socket_clear_peer_endpoint_src(peer);
+	if (!netif_running(wg->dev)) {
+		wg->incoming_port = port;
+		return 0;
+	}
+	return socket_init(wg, port);
+}
+
+static int set_allowedip(struct wireguard_peer *peer, struct nlattr **attrs)
+{
+	int ret = -EINVAL;
+	u16 family;
+	u8 cidr;
+
+	if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] || !attrs[WGALLOWEDIP_A_CIDR_MASK])
+		return ret;
+	family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]);
+	cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]);
+
+	if (family == AF_INET && cidr <= 32 && nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr))
+		ret = allowedips_insert_v4(&peer->device->peer_allowedips, nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, &peer->device->device_update_lock);
+	else if (family == AF_INET6 && cidr <= 128 && nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr))
+		ret = allowedips_insert_v6(&peer->device->peer_allowedips, nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, &peer->device->device_update_lock);
+
+	return ret;
+}
+
+static int set_peer(struct wireguard_device *wg, struct nlattr **attrs)
+{
+	int ret;
+	u32 flags = 0;
+	struct wireguard_peer *peer = NULL;
+	u8 *public_key = NULL, *preshared_key = NULL;
+
+	ret = -EINVAL;
+	if (attrs[WGPEER_A_PUBLIC_KEY] && nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN)
+		public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]);
+	else
+		goto out;
+	if (attrs[WGPEER_A_PRESHARED_KEY] && nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN)
+		preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]);
+	if (attrs[WGPEER_A_FLAGS])
+		flags = nla_get_u32(attrs[WGPEER_A_FLAGS]);
+
+	peer = pubkey_hashtable_lookup(&wg->peer_hashtable, nla_data(attrs[WGPEER_A_PUBLIC_KEY]));
+	if (!peer) { /* Peer doesn't exist yet. Add a new one. */
+		ret = -ENODEV;
+		if (flags & WGPEER_F_REMOVE_ME)
+			goto out; /* Tried to remove a non-existing peer. */
+
+		down_read(&wg->static_identity.lock);
+		if (wg->static_identity.has_identity && !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]), wg->static_identity.static_public, NOISE_PUBLIC_KEY_LEN)) {
+			/* We silently ignore peers that have the same public key as the device. The reason we do it silently
+			 * is that we'd like for people to be able to reuse the same set of API calls across peers.
+			 */
+			up_read(&wg->static_identity.lock);
+			ret = 0;
+			goto out;
+		}
+		up_read(&wg->static_identity.lock);
+
+		ret = -ENOMEM;
+		peer = peer_rcu_get(peer_create(wg, public_key, preshared_key));
+		if (!peer)
+			goto out;
+	}
+
+	ret = 0;
+	if (flags & WGPEER_F_REMOVE_ME) {
+		peer_remove(peer);
+		goto out;
+	}
+
+	if (preshared_key) {
+		down_write(&peer->handshake.lock);
+		memcpy(&peer->handshake.preshared_key, preshared_key, NOISE_SYMMETRIC_KEY_LEN);
+		up_write(&peer->handshake.lock);
+	}
+
+	if (attrs[WGPEER_A_ENDPOINT]) {
+		struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]);
+		size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]);
+
+		if ((len == sizeof(struct sockaddr_in) && addr->sa_family == AF_INET) || (len == sizeof(struct sockaddr_in6) && addr->sa_family == AF_INET6)) {
+			struct endpoint endpoint = { { { 0 } } };
+
+			memcpy(&endpoint.addr, addr, len);
+			socket_set_peer_endpoint(peer, &endpoint);
+		}
+	}
+
+	if (flags & WGPEER_F_REPLACE_ALLOWEDIPS)
+		allowedips_remove_by_peer(&wg->peer_allowedips, peer, &wg->device_update_lock);
+
+	if (attrs[WGPEER_A_ALLOWEDIPS]) {
+		int rem;
+		struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1];
+
+		nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) {
+			ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX, attr, allowedip_policy, NULL);
+			if (ret < 0)
+				goto out;
+			ret = set_allowedip(peer, allowedip);
+			if (ret < 0)
+				goto out;
+		}
+	}
+
+	if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) {
+		const u16 persistent_keepalive_interval = nla_get_u16(attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]);
+		const bool send_keepalive = !peer->persistent_keepalive_interval && persistent_keepalive_interval && netif_running(wg->dev);
+
+		peer->persistent_keepalive_interval = (unsigned long)persistent_keepalive_interval * HZ;
+		if (send_keepalive)
+			packet_send_keepalive(peer);
+	}
+
+	if (netif_running(wg->dev))
+		packet_send_staged_packets(peer);
+
+out:
+	peer_put(peer);
+	if (attrs[WGPEER_A_PRESHARED_KEY])
+		memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]), nla_len(attrs[WGPEER_A_PRESHARED_KEY]));
+	return ret;
+}
+
+static int set_device(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret;
+	struct wireguard_device *wg = lookup_interface(info->attrs, skb);
+
+	if (IS_ERR(wg)) {
+		ret = PTR_ERR(wg);
+		goto out_nodev;
+	}
+
+	rtnl_lock();
+	mutex_lock(&wg->device_update_lock);
+	++wg->device_update_gen;
+
+	if (info->attrs[WGDEVICE_A_FWMARK]) {
+		struct wireguard_peer *peer;
+
+		wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]);
+		list_for_each_entry(peer, &wg->peer_list, peer_list)
+			socket_clear_peer_endpoint_src(peer);
+	}
+
+	if (info->attrs[WGDEVICE_A_LISTEN_PORT]) {
+		ret = set_port(wg, nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT]));
+		if (ret)
+			goto out;
+	}
+
+	if (info->attrs[WGDEVICE_A_FLAGS] && nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]) & WGDEVICE_F_REPLACE_PEERS)
+		peer_remove_all(wg);
+
+	if (info->attrs[WGDEVICE_A_PRIVATE_KEY] && nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) == NOISE_PUBLIC_KEY_LEN) {
+		struct wireguard_peer *peer, *temp;
+		u8 public_key[NOISE_PUBLIC_KEY_LEN], *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]);
+
+		/* We remove before setting, to prevent race, which means doing two 25519-genpub ops. */
+		if (curve25519_generate_public(public_key, private_key)) {
+			peer = pubkey_hashtable_lookup(&wg->peer_hashtable, public_key);
+			if (peer) {
+				peer_put(peer);
+				peer_remove(peer);
+			}
+		}
+
+		noise_set_static_identity_private_key(&wg->static_identity, private_key);
+		list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) {
+			if (!noise_precompute_static_static(peer))
+				peer_remove(peer);
+		}
+		cookie_checker_precompute_device_keys(&wg->cookie_checker);
+	}
+
+	if (info->attrs[WGDEVICE_A_PEERS]) {
+		int rem;
+		struct nlattr *attr, *peer[WGPEER_A_MAX + 1];
+
+		nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) {
+			ret = nla_parse_nested(peer, WGPEER_A_MAX, attr, peer_policy, NULL);
+			if (ret < 0)
+				goto out;
+			ret = set_peer(wg, peer);
+			if (ret < 0)
+				goto out;
+		}
+	}
+	ret = 0;
+
+out:
+	mutex_unlock(&wg->device_update_lock);
+	rtnl_unlock();
+	dev_put(wg->dev);
+out_nodev:
+	if (info->attrs[WGDEVICE_A_PRIVATE_KEY])
+		memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]), nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]));
+	return ret;
+}
+
+static const struct genl_ops genl_ops[] = {
+	{
+		.cmd = WG_CMD_GET_DEVICE,
+#ifndef COMPAT_CANNOT_USE_NETLINK_START
+		.start = get_device_start,
+#endif
+		.dumpit = get_device_dump,
+		.done = get_device_done,
+		.policy = device_policy,
+		.flags = GENL_UNS_ADMIN_PERM
+	}, {
+		.cmd = WG_CMD_SET_DEVICE,
+		.doit = set_device,
+		.policy = device_policy,
+		.flags = GENL_UNS_ADMIN_PERM
+	}
+};
+
+static struct genl_family genl_family
+#ifndef COMPAT_CANNOT_USE_GENL_NOPS
+__ro_after_init = {
+	.ops = genl_ops,
+	.n_ops = ARRAY_SIZE(genl_ops),
+#else
+= {
+#endif
+	.name = WG_GENL_NAME,
+	.version = WG_GENL_VERSION,
+	.maxattr = WGDEVICE_A_MAX,
+	.module = THIS_MODULE,
+	.netnsok = true
+};
+
+int __init genetlink_init(void)
+{
+	return genl_register_family(&genl_family);
+}
+
+void __exit genetlink_uninit(void)
+{
+	genl_unregister_family(&genl_family);
+}
diff --git b/net/wireguard/netlink.h b/net/wireguard/netlink.h
new file mode 100644
index 0000000..e3982bb
--- /dev/null
+++ b/net/wireguard/netlink.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_NETLINK_H
+#define _WG_NETLINK_H
+
+int genetlink_init(void);
+void genetlink_uninit(void);
+
+#endif /* _WG_NETLINK_H */
diff --git b/net/wireguard/noise.c b/net/wireguard/noise.c
new file mode 100644
index 0000000..fa0902b
--- /dev/null
+++ b/net/wireguard/noise.c
@@ -0,0 +1,635 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "noise.h"
+#include "device.h"
+#include "peer.h"
+#include "messages.h"
+#include "queueing.h"
+#include "hashtables.h"
+
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <crypto/algapi.h>
+
+/* This implements Noise_IKpsk2:
+ *
+ * <- s
+ * ******
+ * -> e, es, s, ss, {t}
+ * <- e, ee, se, psk, {}
+ */
+
+static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s";
+static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com";
+static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init;
+static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init;
+static atomic64_t keypair_counter = ATOMIC64_INIT(0);
+
+void __init noise_init(void)
+{
+	struct blake2s_state blake;
+
+	blake2s(handshake_init_chaining_key, handshake_name, NULL, NOISE_HASH_LEN, sizeof(handshake_name), 0);
+	blake2s_init(&blake, NOISE_HASH_LEN);
+	blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN);
+	blake2s_update(&blake, identifier_name, sizeof(identifier_name));
+	blake2s_final(&blake, handshake_init_hash, NOISE_HASH_LEN);
+}
+
+bool noise_precompute_static_static(struct wireguard_peer *peer)
+{
+	if (peer->handshake.static_identity->has_identity)
+		return curve25519(peer->handshake.precomputed_static_static, peer->handshake.static_identity->static_private, peer->handshake.remote_static);
+	memset(peer->handshake.precomputed_static_static, 0, NOISE_PUBLIC_KEY_LEN);
+	return true;
+}
+
+bool noise_handshake_init(struct noise_handshake *handshake, struct noise_static_identity *static_identity, const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], struct wireguard_peer *peer)
+{
+	memset(handshake, 0, sizeof(struct noise_handshake));
+	init_rwsem(&handshake->lock);
+	handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE;
+	handshake->entry.peer = peer;
+	memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN);
+	if (peer_preshared_key)
+		memcpy(handshake->preshared_key, peer_preshared_key, NOISE_SYMMETRIC_KEY_LEN);
+	handshake->static_identity = static_identity;
+	handshake->state = HANDSHAKE_ZEROED;
+	return noise_precompute_static_static(peer);
+}
+
+static void handshake_zero(struct noise_handshake *handshake)
+{
+	memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN);
+	memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN);
+	memset(&handshake->hash, 0, NOISE_HASH_LEN);
+	memset(&handshake->chaining_key, 0, NOISE_HASH_LEN);
+	handshake->remote_index = 0;
+	handshake->state = HANDSHAKE_ZEROED;
+}
+
+void noise_handshake_clear(struct noise_handshake *handshake)
+{
+	index_hashtable_remove(&handshake->entry.peer->device->index_hashtable, &handshake->entry);
+	down_write(&handshake->lock);
+	handshake_zero(handshake);
+	up_write(&handshake->lock);
+	index_hashtable_remove(&handshake->entry.peer->device->index_hashtable, &handshake->entry);
+}
+
+static struct noise_keypair *keypair_create(struct wireguard_peer *peer)
+{
+	struct noise_keypair *keypair = kzalloc(sizeof(struct noise_keypair), GFP_KERNEL);
+
+	if (unlikely(!keypair))
+		return NULL;
+	keypair->internal_id = atomic64_inc_return(&keypair_counter);
+	keypair->entry.type = INDEX_HASHTABLE_KEYPAIR;
+	keypair->entry.peer = peer;
+	kref_init(&keypair->refcount);
+	return keypair;
+}
+
+static void keypair_free_rcu(struct rcu_head *rcu)
+{
+	struct noise_keypair *keypair = container_of(rcu, struct noise_keypair, rcu);
+
+	net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n", keypair->entry.peer->device->dev->name, keypair->internal_id, keypair->entry.peer->internal_id);
+	kzfree(keypair);
+}
+
+static void keypair_free_kref(struct kref *kref)
+{
+	struct noise_keypair *keypair = container_of(kref, struct noise_keypair, refcount);
+
+	index_hashtable_remove(&keypair->entry.peer->device->index_hashtable, &keypair->entry);
+	call_rcu_bh(&keypair->rcu, keypair_free_rcu);
+}
+
+void noise_keypair_put(struct noise_keypair *keypair)
+{
+	if (unlikely(!keypair))
+		return;
+	kref_put(&keypair->refcount, keypair_free_kref);
+}
+
+struct noise_keypair *noise_keypair_get(struct noise_keypair *keypair)
+{
+	RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), "Taking noise keypair reference without holding the RCU BH read lock");
+	if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount)))
+		return NULL;
+	return keypair;
+}
+
+void noise_keypairs_clear(struct noise_keypairs *keypairs)
+{
+	struct noise_keypair *old;
+
+	spin_lock_bh(&keypairs->keypair_update_lock);
+	old = rcu_dereference_protected(keypairs->previous_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
+	rcu_assign_pointer(keypairs->previous_keypair, NULL);
+	noise_keypair_put(old);
+	old = rcu_dereference_protected(keypairs->next_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
+	rcu_assign_pointer(keypairs->next_keypair, NULL);
+	noise_keypair_put(old);
+	old = rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
+	rcu_assign_pointer(keypairs->current_keypair, NULL);
+	noise_keypair_put(old);
+	spin_unlock_bh(&keypairs->keypair_update_lock);
+}
+
+static void add_new_keypair(struct noise_keypairs *keypairs, struct noise_keypair *new_keypair)
+{
+	struct noise_keypair *previous_keypair, *next_keypair, *current_keypair;
+
+	spin_lock_bh(&keypairs->keypair_update_lock);
+	previous_keypair = rcu_dereference_protected(keypairs->previous_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
+	next_keypair = rcu_dereference_protected(keypairs->next_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
+	current_keypair =  rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
+	if (new_keypair->i_am_the_initiator) {
+		/* If we're the initiator, it means we've sent a handshake, and received
+		 * a confirmation response, which means this new keypair can now be used.
+		 */
+		if (next_keypair) {
+			/* If there already was a next keypair pending, we demote it to be
+			 * the previous keypair, and free the existing current.
+			 * TODO: note that this means KCI can result in this transition. It
+			 * would perhaps be more sound to always just get rid of the unused
+			 * next keypair instead of putting it in the previous slot, but this
+			 * might be a bit less robust. Something to think about and decide on.
+			 */
+			rcu_assign_pointer(keypairs->next_keypair, NULL);
+			rcu_assign_pointer(keypairs->previous_keypair, next_keypair);
+			noise_keypair_put(current_keypair);
+		} else	/* If there wasn't an existing next keypair, we replace the
+			 * previous with the current one.
+			 */
+			rcu_assign_pointer(keypairs->previous_keypair, current_keypair);
+		/* At this point we can get rid of the old previous keypair, and set up
+		 * the new keypair.
+		 */
+		noise_keypair_put(previous_keypair);
+		rcu_assign_pointer(keypairs->current_keypair, new_keypair);
+	} else {
+		/* If we're the responder, it means we can't use the new keypair until
+		 * we receive confirmation via the first data packet, so we get rid of
+		 * the existing previous one, the possibly existing next one, and slide
+		 * in the new next one.
+		 */
+		rcu_assign_pointer(keypairs->next_keypair, new_keypair);
+		noise_keypair_put(next_keypair);
+		rcu_assign_pointer(keypairs->previous_keypair, NULL);
+		noise_keypair_put(previous_keypair);
+	}
+	spin_unlock_bh(&keypairs->keypair_update_lock);
+}
+
+bool noise_received_with_keypair(struct noise_keypairs *keypairs, struct noise_keypair *received_keypair)
+{
+	bool key_is_new;
+	struct noise_keypair *old_keypair;
+
+	/* We first check without taking the spinlock. */
+	key_is_new = received_keypair == rcu_access_pointer(keypairs->next_keypair);
+	if (likely(!key_is_new))
+		return false;
+
+	spin_lock_bh(&keypairs->keypair_update_lock);
+	/* After locking, we double check that things didn't change from beneath us. */
+	if (unlikely(received_keypair != rcu_dereference_protected(keypairs->next_keypair, lockdep_is_held(&keypairs->keypair_update_lock)))) {
+		spin_unlock_bh(&keypairs->keypair_update_lock);
+		return false;
+	}
+
+	/* When we've finally received the confirmation, we slide the next
+	 * into the current, the current into the previous, and get rid of
+	 * the old previous.
+	 */
+	old_keypair = rcu_dereference_protected(keypairs->previous_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
+	rcu_assign_pointer(keypairs->previous_keypair, rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock)));
+	noise_keypair_put(old_keypair);
+	rcu_assign_pointer(keypairs->current_keypair, received_keypair);
+	rcu_assign_pointer(keypairs->next_keypair, NULL);
+
+	spin_unlock_bh(&keypairs->keypair_update_lock);
+	return true;
+}
+
+void noise_set_static_identity_private_key(struct noise_static_identity *static_identity, const u8 private_key[NOISE_PUBLIC_KEY_LEN])
+{
+	down_write(&static_identity->lock);
+	memcpy(static_identity->static_private, private_key, NOISE_PUBLIC_KEY_LEN);
+	static_identity->has_identity = curve25519_generate_public(static_identity->static_public, private_key);
+	up_write(&static_identity->lock);
+}
+
+/* This is Hugo Krawczyk's HKDF:
+ *  - https://eprint.iacr.org/2010/264.pdf
+ *  - https://tools.ietf.org/html/rfc5869
+ */
+static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, size_t first_len, size_t second_len, size_t third_len, size_t data_len, const u8 chaining_key[NOISE_HASH_LEN])
+{
+	u8 secret[BLAKE2S_OUTBYTES];
+	u8 output[BLAKE2S_OUTBYTES + 1];
+
+#ifdef DEBUG
+	BUG_ON(first_len > BLAKE2S_OUTBYTES || second_len > BLAKE2S_OUTBYTES || third_len > BLAKE2S_OUTBYTES || ((second_len || second_dst || third_len || third_dst) && (!first_len || !first_dst)) || ((third_len || third_dst) && (!second_len || !second_dst)));
+#endif
+
+	/* Extract entropy from data into secret */
+	blake2s_hmac(secret, data, chaining_key, BLAKE2S_OUTBYTES, data_len, NOISE_HASH_LEN);
+
+	if (!first_dst || !first_len)
+		goto out;
+
+	/* Expand first key: key = secret, data = 0x1 */
+	output[0] = 1;
+	blake2s_hmac(output, output, secret, BLAKE2S_OUTBYTES, 1, BLAKE2S_OUTBYTES);
+	memcpy(first_dst, output, first_len);
+
+	if (!second_dst || !second_len)
+		goto out;
+
+	/* Expand second key: key = secret, data = first-key || 0x2 */
+	output[BLAKE2S_OUTBYTES] = 2;
+	blake2s_hmac(output, output, secret, BLAKE2S_OUTBYTES, BLAKE2S_OUTBYTES + 1, BLAKE2S_OUTBYTES);
+	memcpy(second_dst, output, second_len);
+
+	if (!third_dst || !third_len)
+		goto out;
+
+	/* Expand third key: key = secret, data = second-key || 0x3 */
+	output[BLAKE2S_OUTBYTES] = 3;
+	blake2s_hmac(output, output, secret, BLAKE2S_OUTBYTES, BLAKE2S_OUTBYTES + 1, BLAKE2S_OUTBYTES);
+	memcpy(third_dst, output, third_len);
+
+out:
+	/* Clear sensitive data from stack */
+	memzero_explicit(secret, BLAKE2S_OUTBYTES);
+	memzero_explicit(output, BLAKE2S_OUTBYTES + 1);
+}
+
+static void symmetric_key_init(struct noise_symmetric_key *key)
+{
+	spin_lock_init(&key->counter.receive.lock);
+	atomic64_set(&key->counter.counter, 0);
+	memset(key->counter.receive.backtrack, 0, sizeof(key->counter.receive.backtrack));
+	key->birthdate = get_jiffies_64();
+	key->is_valid = true;
+}
+
+static void derive_keys(struct noise_symmetric_key *first_dst, struct noise_symmetric_key *second_dst, const u8 chaining_key[NOISE_HASH_LEN])
+{
+	kdf(first_dst->key, second_dst->key, NULL, NULL, NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, chaining_key);
+	symmetric_key_init(first_dst);
+	symmetric_key_init(second_dst);
+}
+
+static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], u8 key[NOISE_SYMMETRIC_KEY_LEN], const u8 private[NOISE_PUBLIC_KEY_LEN], const u8 public[NOISE_PUBLIC_KEY_LEN])
+{
+	u8 dh_calculation[NOISE_PUBLIC_KEY_LEN];
+
+	if (unlikely(!curve25519(dh_calculation, private, public)))
+		return false;
+	kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
+	memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN);
+	return true;
+}
+
+static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
+{
+	struct blake2s_state blake;
+
+	blake2s_init(&blake, NOISE_HASH_LEN);
+	blake2s_update(&blake, hash, NOISE_HASH_LEN);
+	blake2s_update(&blake, src, src_len);
+	blake2s_final(&blake, hash, NOISE_HASH_LEN);
+}
+
+static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], u8 key[NOISE_SYMMETRIC_KEY_LEN], const u8 psk[NOISE_SYMMETRIC_KEY_LEN])
+{
+	u8 temp_hash[NOISE_HASH_LEN];
+
+	kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key);
+	mix_hash(hash, temp_hash, NOISE_HASH_LEN);
+	memzero_explicit(temp_hash, NOISE_HASH_LEN);
+}
+
+static void handshake_init(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], const u8 remote_static[NOISE_PUBLIC_KEY_LEN])
+{
+	memcpy(hash, handshake_init_hash, NOISE_HASH_LEN);
+	memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN);
+	mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN);
+}
+
+static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext, size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], u8 hash[NOISE_HASH_LEN])
+{
+	chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash, NOISE_HASH_LEN, 0 /* Always zero for Noise_IK */, key);
+	mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len));
+}
+
+static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext, size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], u8 hash[NOISE_HASH_LEN])
+{
+	if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len, hash, NOISE_HASH_LEN, 0 /* Always zero for Noise_IK */, key))
+		return false;
+	mix_hash(hash, src_ciphertext, src_len);
+	return true;
+}
+
+static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN], const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN], u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN])
+{
+	if (ephemeral_dst != ephemeral_src)
+		memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
+	mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
+	kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
+}
+
+static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN])
+{
+	struct timespec64 now;
+
+	getnstimeofday64(&now);
+	/* https://cr.yp.to/libtai/tai64.html */
+	*(__be64 *)output = cpu_to_be64(4611686018427387914ULL + now.tv_sec);
+	*(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec);
+}
+
+bool noise_handshake_create_initiation(struct message_handshake_initiation *dst, struct noise_handshake *handshake)
+{
+	u8 timestamp[NOISE_TIMESTAMP_LEN];
+	u8 key[NOISE_SYMMETRIC_KEY_LEN];
+	bool ret = false;
+
+	down_read(&handshake->static_identity->lock);
+	down_write(&handshake->lock);
+
+	if (unlikely(!handshake->static_identity->has_identity))
+		goto out;
+
+	dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION);
+
+	handshake_init(handshake->chaining_key, handshake->hash, handshake->remote_static);
+
+	/* e */
+	curve25519_generate_secret(handshake->ephemeral_private);
+	if (!curve25519_generate_public(dst->unencrypted_ephemeral, handshake->ephemeral_private))
+		goto out;
+	message_ephemeral(dst->unencrypted_ephemeral, dst->unencrypted_ephemeral, handshake->chaining_key, handshake->hash);
+
+	/* es */
+	if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private, handshake->remote_static))
+		goto out;
+
+	/* s */
+	message_encrypt(dst->encrypted_static, handshake->static_identity->static_public, NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
+
+	/* ss */
+	kdf(handshake->chaining_key, key, NULL, handshake->precomputed_static_static, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, handshake->chaining_key);
+
+	/* {t} */
+	tai64n_now(timestamp);
+	message_encrypt(dst->encrypted_timestamp, timestamp, NOISE_TIMESTAMP_LEN, key, handshake->hash);
+
+	dst->sender_index = index_hashtable_insert(&handshake->entry.peer->device->index_hashtable, &handshake->entry);
+
+	handshake->state = HANDSHAKE_CREATED_INITIATION;
+	ret = true;
+
+out:
+	up_write(&handshake->lock);
+	up_read(&handshake->static_identity->lock);
+	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+	return ret;
+}
+
+struct wireguard_peer *noise_handshake_consume_initiation(struct message_handshake_initiation *src, struct wireguard_device *wg)
+{
+	bool replay_attack, flood_attack;
+	u8 s[NOISE_PUBLIC_KEY_LEN];
+	u8 e[NOISE_PUBLIC_KEY_LEN];
+	u8 t[NOISE_TIMESTAMP_LEN];
+	struct noise_handshake *handshake;
+	struct wireguard_peer *wg_peer = NULL;
+	u8 key[NOISE_SYMMETRIC_KEY_LEN];
+	u8 hash[NOISE_HASH_LEN];
+	u8 chaining_key[NOISE_HASH_LEN];
+
+	down_read(&wg->static_identity.lock);
+	if (unlikely(!wg->static_identity.has_identity))
+		goto out;
+
+	handshake_init(chaining_key, hash, wg->static_identity.static_public);
+
+	/* e */
+	message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
+
+	/* es */
+	if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e))
+		goto out;
+
+	/* s */
+	if (!message_decrypt(s, src->encrypted_static, sizeof(src->encrypted_static), key, hash))
+		goto out;
+
+	/* Lookup which peer we're actually talking to */
+	wg_peer = pubkey_hashtable_lookup(&wg->peer_hashtable, s);
+	if (!wg_peer)
+		goto out;
+	handshake = &wg_peer->handshake;
+
+	/* ss */
+	kdf(chaining_key, key, NULL, handshake->precomputed_static_static, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
+
+	/* {t} */
+	if (!message_decrypt(t, src->encrypted_timestamp, sizeof(src->encrypted_timestamp), key, hash))
+		goto out;
+
+	down_read(&handshake->lock);
+	replay_attack = memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) <= 0;
+	flood_attack = !time_is_before_jiffies64(handshake->last_initiation_consumption + INITIATIONS_PER_SECOND);
+	up_read(&handshake->lock);
+	if (replay_attack || flood_attack) {
+		peer_put(wg_peer);
+		wg_peer = NULL;
+		goto out;
+	}
+
+	/* Success! Copy everything to peer */
+	down_write(&handshake->lock);
+	memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
+	memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN);
+	memcpy(handshake->hash, hash, NOISE_HASH_LEN);
+	memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
+	handshake->remote_index = src->sender_index;
+	handshake->last_initiation_consumption = get_jiffies_64();
+	handshake->state = HANDSHAKE_CONSUMED_INITIATION;
+	up_write(&handshake->lock);
+
+out:
+	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+	memzero_explicit(hash, NOISE_HASH_LEN);
+	memzero_explicit(chaining_key, NOISE_HASH_LEN);
+	up_read(&wg->static_identity.lock);
+	return wg_peer;
+}
+
+bool noise_handshake_create_response(struct message_handshake_response *dst, struct noise_handshake *handshake)
+{
+	bool ret = false;
+	u8 key[NOISE_SYMMETRIC_KEY_LEN];
+
+	down_read(&handshake->static_identity->lock);
+	down_write(&handshake->lock);
+
+	if (handshake->state != HANDSHAKE_CONSUMED_INITIATION)
+		goto out;
+
+	dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE);
+	dst->receiver_index = handshake->remote_index;
+
+	/* e */
+	curve25519_generate_secret(handshake->ephemeral_private);
+	if (!curve25519_generate_public(dst->unencrypted_ephemeral, handshake->ephemeral_private))
+		goto out;
+	message_ephemeral(dst->unencrypted_ephemeral, dst->unencrypted_ephemeral, handshake->chaining_key, handshake->hash);
+
+	/* ee */
+	if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, handshake->remote_ephemeral))
+		goto out;
+
+	/* se */
+	if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, handshake->remote_static))
+		goto out;
+
+	/* psk */
+	mix_psk(handshake->chaining_key, handshake->hash, key, handshake->preshared_key);
+
+	/* {} */
+	message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash);
+
+	dst->sender_index = index_hashtable_insert(&handshake->entry.peer->device->index_hashtable, &handshake->entry);
+
+	handshake->state = HANDSHAKE_CREATED_RESPONSE;
+	ret = true;
+
+out:
+	up_write(&handshake->lock);
+	up_read(&handshake->static_identity->lock);
+	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+	return ret;
+}
+
+struct wireguard_peer *noise_handshake_consume_response(struct message_handshake_response *src, struct wireguard_device *wg)
+{
+	struct noise_handshake *handshake;
+	struct wireguard_peer *ret_peer = NULL;
+	u8 key[NOISE_SYMMETRIC_KEY_LEN];
+	u8 hash[NOISE_HASH_LEN];
+	u8 chaining_key[NOISE_HASH_LEN];
+	u8 e[NOISE_PUBLIC_KEY_LEN];
+	u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
+	u8 static_private[NOISE_PUBLIC_KEY_LEN];
+	enum noise_handshake_state state = HANDSHAKE_ZEROED;
+
+	down_read(&wg->static_identity.lock);
+
+	if (unlikely(!wg->static_identity.has_identity))
+		goto out;
+
+	handshake = (struct noise_handshake *)index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE, src->receiver_index);
+	if (unlikely(!handshake))
+		goto out;
+
+	down_read(&handshake->lock);
+	state = handshake->state;
+	memcpy(hash, handshake->hash, NOISE_HASH_LEN);
+	memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN);
+	memcpy(ephemeral_private, handshake->ephemeral_private, NOISE_PUBLIC_KEY_LEN);
+	up_read(&handshake->lock);
+
+	if (state != HANDSHAKE_CREATED_INITIATION)
+		goto fail;
+
+	/* e */
+	message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
+
+	/* ee */
+	if (!mix_dh(chaining_key, NULL, ephemeral_private, e))
+		goto out;
+
+	/* se */
+	if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e))
+		goto out;
+
+	/* psk */
+	mix_psk(chaining_key, hash, key, handshake->preshared_key);
+
+	/* {} */
+	if (!message_decrypt(NULL, src->encrypted_nothing, sizeof(src->encrypted_nothing), key, hash))
+		goto fail;
+
+	/* Success! Copy everything to peer */
+	down_write(&handshake->lock);
+	/* It's important to check that the state is still the same, while we have an exclusive lock */
+	if (handshake->state != state) {
+		up_write(&handshake->lock);
+		goto fail;
+	}
+	memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
+	memcpy(handshake->hash, hash, NOISE_HASH_LEN);
+	memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
+	handshake->remote_index = src->sender_index;
+	handshake->state = HANDSHAKE_CONSUMED_RESPONSE;
+	up_write(&handshake->lock);
+	ret_peer = handshake->entry.peer;
+	goto out;
+
+fail:
+	peer_put(handshake->entry.peer);
+out:
+	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+	memzero_explicit(hash, NOISE_HASH_LEN);
+	memzero_explicit(chaining_key, NOISE_HASH_LEN);
+	memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN);
+	memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN);
+	up_read(&wg->static_identity.lock);
+	return ret_peer;
+}
+
+bool noise_handshake_begin_session(struct noise_handshake *handshake, struct noise_keypairs *keypairs)
+{
+	struct noise_keypair *new_keypair;
+
+	down_write(&handshake->lock);
+	if (handshake->state != HANDSHAKE_CREATED_RESPONSE && handshake->state != HANDSHAKE_CONSUMED_RESPONSE)
+		goto fail;
+
+	new_keypair = keypair_create(handshake->entry.peer);
+	if (!new_keypair)
+		goto fail;
+	new_keypair->i_am_the_initiator = handshake->state == HANDSHAKE_CONSUMED_RESPONSE;
+	new_keypair->remote_index = handshake->remote_index;
+
+	if (new_keypair->i_am_the_initiator)
+		derive_keys(&new_keypair->sending, &new_keypair->receiving, handshake->chaining_key);
+	else
+		derive_keys(&new_keypair->receiving, &new_keypair->sending, handshake->chaining_key);
+
+	handshake_zero(handshake);
+	add_new_keypair(keypairs, new_keypair);
+	net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n", new_keypair->entry.peer->device->dev->name, new_keypair->internal_id, new_keypair->entry.peer->internal_id);
+	WARN_ON(!index_hashtable_replace(&handshake->entry.peer->device->index_hashtable, &handshake->entry, &new_keypair->entry));
+	up_write(&handshake->lock);
+
+	return true;
+
+fail:
+	up_write(&handshake->lock);
+	return false;
+}
diff --git b/net/wireguard/noise.h b/net/wireguard/noise.h
new file mode 100644
index 0000000..2ce40db
--- /dev/null
+++ b/net/wireguard/noise.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * See doc/protocol.md and https://github.com/trevp/noise/blob/master/noise.md for more info
+ */
+
+#ifndef _WG_NOISE_H
+#define _WG_NOISE_H
+
+#include "messages.h"
+#include "hashtables.h"
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/jiffies.h>
+#include <linux/kref.h>
+
+union noise_counter {
+	struct {
+		u64 counter;
+		unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
+		spinlock_t lock;
+	} receive;
+	atomic64_t counter;
+};
+
+struct noise_symmetric_key {
+	u8 key[NOISE_SYMMETRIC_KEY_LEN];
+	union noise_counter counter;
+	u64 birthdate;
+	bool is_valid;
+};
+
+struct noise_keypair {
+	struct index_hashtable_entry entry;
+	struct noise_symmetric_key sending;
+	struct noise_symmetric_key receiving;
+	__le32 remote_index;
+	bool i_am_the_initiator;
+	struct kref refcount;
+	struct rcu_head rcu;
+	u64 internal_id;
+};
+
+struct noise_keypairs {
+	struct noise_keypair __rcu *current_keypair;
+	struct noise_keypair __rcu *previous_keypair;
+	struct noise_keypair __rcu *next_keypair;
+	spinlock_t keypair_update_lock;
+};
+
+struct noise_static_identity {
+	bool has_identity;
+	u8 static_public[NOISE_PUBLIC_KEY_LEN];
+	u8 static_private[NOISE_PUBLIC_KEY_LEN];
+	struct rw_semaphore lock;
+};
+
+enum noise_handshake_state {
+	HANDSHAKE_ZEROED,
+	HANDSHAKE_CREATED_INITIATION,
+	HANDSHAKE_CONSUMED_INITIATION,
+	HANDSHAKE_CREATED_RESPONSE,
+	HANDSHAKE_CONSUMED_RESPONSE
+};
+
+struct noise_handshake {
+	struct index_hashtable_entry entry;
+
+	enum noise_handshake_state state;
+	u64 last_initiation_consumption;
+
+	struct noise_static_identity *static_identity;
+
+	u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
+	u8 remote_static[NOISE_PUBLIC_KEY_LEN];
+	u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN];
+	u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN];
+
+	u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
+
+	u8 hash[NOISE_HASH_LEN];
+	u8 chaining_key[NOISE_HASH_LEN];
+
+	u8 latest_timestamp[NOISE_TIMESTAMP_LEN];
+	__le32 remote_index;
+
+	/* Protects all members except the immutable (after noise_handshake_init): remote_static, precomputed_static_static, static_identity */
+	struct rw_semaphore lock;
+};
+
+struct wireguard_device;
+
+void noise_init(void);
+bool noise_handshake_init(struct noise_handshake *handshake, struct noise_static_identity *static_identity, const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], struct wireguard_peer *peer);
+void noise_handshake_clear(struct noise_handshake *handshake);
+void noise_keypair_put(struct noise_keypair *keypair);
+struct noise_keypair *noise_keypair_get(struct noise_keypair *keypair);
+void noise_keypairs_clear(struct noise_keypairs *keypairs);
+bool noise_received_with_keypair(struct noise_keypairs *keypairs, struct noise_keypair *received_keypair);
+
+void noise_set_static_identity_private_key(struct noise_static_identity *static_identity, const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
+bool noise_precompute_static_static(struct wireguard_peer *peer);
+
+bool noise_handshake_create_initiation(struct message_handshake_initiation *dst, struct noise_handshake *handshake);
+struct wireguard_peer *noise_handshake_consume_initiation(struct message_handshake_initiation *src, struct wireguard_device *wg);
+
+bool noise_handshake_create_response(struct message_handshake_response *dst, struct noise_handshake *peer);
+struct wireguard_peer *noise_handshake_consume_response(struct message_handshake_response *src, struct wireguard_device *wg);
+
+bool noise_handshake_begin_session(struct noise_handshake *handshake, struct noise_keypairs *keypairs);
+
+#endif /* _WG_NOISE_H */
diff --git b/net/wireguard/peer.c b/net/wireguard/peer.c
new file mode 100644
index 0000000..6945e6f
--- /dev/null
+++ b/net/wireguard/peer.c
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "peer.h"
+#include "device.h"
+#include "queueing.h"
+#include "timers.h"
+#include "hashtables.h"
+#include "noise.h"
+
+#include <linux/kref.h>
+#include <linux/lockdep.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+
+static atomic64_t peer_counter = ATOMIC64_INIT(0);
+
+struct wireguard_peer *peer_create(struct wireguard_device *wg, const u8 public_key[NOISE_PUBLIC_KEY_LEN], const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN])
+{
+	struct wireguard_peer *peer;
+
+	lockdep_assert_held(&wg->device_update_lock);
+
+	if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
+		return NULL;
+	++wg->num_peers;
+
+	peer = kzalloc(sizeof(struct wireguard_peer), GFP_KERNEL);
+	if (!peer)
+		return NULL;
+	peer->device = wg;
+
+	if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) {
+		kfree(peer);
+		return NULL;
+	}
+
+	peer->internal_id = atomic64_inc_return(&peer_counter);
+	peer->serial_work_cpu = nr_cpumask_bits;
+	cookie_init(&peer->latest_cookie);
+	if (!noise_handshake_init(&peer->handshake, &wg->static_identity, public_key, preshared_key, peer)) {
+		kfree(peer);
+		return NULL;
+	}
+	timers_init(peer);
+	cookie_checker_precompute_peer_keys(peer);
+	spin_lock_init(&peer->keypairs.keypair_update_lock);
+	INIT_WORK(&peer->transmit_handshake_work, packet_handshake_send_worker);
+	rwlock_init(&peer->endpoint_lock);
+	kref_init(&peer->refcount);
+	packet_queue_init(&peer->tx_queue, packet_tx_worker, false, MAX_QUEUED_PACKETS);
+	packet_queue_init(&peer->rx_queue, packet_rx_worker, false, MAX_QUEUED_PACKETS);
+	skb_queue_head_init(&peer->staged_packet_queue);
+	list_add_tail(&peer->peer_list, &wg->peer_list);
+	pubkey_hashtable_add(&wg->peer_hashtable, peer);
+	peer->last_sent_handshake = get_jiffies_64() - REKEY_TIMEOUT - HZ;
+	pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
+	return peer;
+}
+
+struct wireguard_peer *peer_get(struct wireguard_peer *peer)
+{
+	RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), "Taking peer reference without holding the RCU read lock");
+	if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount)))
+		return NULL;
+	return peer;
+}
+
+struct wireguard_peer *peer_rcu_get(struct wireguard_peer *peer)
+{
+	rcu_read_lock_bh();
+	peer = peer_get(peer);
+	rcu_read_unlock_bh();
+	return peer;
+}
+
+/* We have a separate "remove" function to get rid of the final reference because
+ * peer_list, clearing handshakes, and flushing all require mutexes which requires
+ * sleeping, which must only be done from certain contexts.
+ */
+void peer_remove(struct wireguard_peer *peer)
+{
+	if (unlikely(!peer))
+		return;
+	lockdep_assert_held(&peer->device->device_update_lock);
+	allowedips_remove_by_peer(&peer->device->peer_allowedips, peer, &peer->device->device_update_lock);
+	pubkey_hashtable_remove(&peer->device->peer_hashtable, peer);
+	skb_queue_purge(&peer->staged_packet_queue);
+	noise_handshake_clear(&peer->handshake);
+	noise_keypairs_clear(&peer->keypairs);
+	list_del_init(&peer->peer_list);
+	timers_stop(peer);
+	flush_workqueue(peer->device->packet_crypt_wq); /* The first flush is for encrypt/decrypt step. */
+	flush_workqueue(peer->device->packet_crypt_wq); /* The second flush is for send/receive step. */
+	flush_workqueue(peer->device->handshake_send_wq);
+	--peer->device->num_peers;
+	peer_put(peer);
+}
+
+static void rcu_release(struct rcu_head *rcu)
+{
+	struct wireguard_peer *peer = container_of(rcu, struct wireguard_peer, rcu);
+
+	pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr);
+	dst_cache_destroy(&peer->endpoint_cache);
+	packet_queue_free(&peer->rx_queue, false);
+	packet_queue_free(&peer->tx_queue, false);
+	kzfree(peer);
+}
+
+static void kref_release(struct kref *refcount)
+{
+	struct wireguard_peer *peer = container_of(refcount, struct wireguard_peer, refcount);
+
+	index_hashtable_remove(&peer->device->index_hashtable, &peer->handshake.entry);
+	skb_queue_purge(&peer->staged_packet_queue);
+	call_rcu_bh(&peer->rcu, rcu_release);
+}
+
+void peer_put(struct wireguard_peer *peer)
+{
+	if (unlikely(!peer))
+		return;
+	kref_put(&peer->refcount, kref_release);
+}
+
+void peer_remove_all(struct wireguard_device *wg)
+{
+	struct wireguard_peer *peer, *temp;
+
+	lockdep_assert_held(&wg->device_update_lock);
+	list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list)
+		peer_remove(peer);
+}
diff --git b/net/wireguard/peer.h b/net/wireguard/peer.h
new file mode 100644
index 0000000..3b66e45
--- /dev/null
+++ b/net/wireguard/peer.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_PEER_H
+#define _WG_PEER_H
+
+#include "device.h"
+#include "noise.h"
+#include "cookie.h"
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/spinlock.h>
+#include <linux/kref.h>
+#include <net/dst_cache.h>
+
+struct wireguard_device;
+
+struct endpoint {
+	union {
+		struct sockaddr addr;
+		struct sockaddr_in addr4;
+		struct sockaddr_in6 addr6;
+	};
+	union {
+		struct {
+			struct in_addr src4;
+			int src_if4; /* Essentially the same as addr6->scope_id */
+		};
+		struct in6_addr src6;
+	};
+};
+
+struct wireguard_peer {
+	struct wireguard_device *device;
+	struct crypt_queue tx_queue, rx_queue;
+	struct sk_buff_head staged_packet_queue;
+	int serial_work_cpu;
+	struct noise_keypairs keypairs;
+	struct endpoint endpoint;
+	struct dst_cache endpoint_cache;
+	rwlock_t endpoint_lock;
+	struct noise_handshake handshake;
+	u64 last_sent_handshake;
+	struct work_struct transmit_handshake_work, clear_peer_work;
+	struct cookie latest_cookie;
+	struct hlist_node pubkey_hash;
+	u64 rx_bytes, tx_bytes;
+	struct timer_list timer_retransmit_handshake, timer_send_keepalive, timer_new_handshake, timer_zero_key_material, timer_persistent_keepalive;
+	unsigned int timer_handshake_attempts;
+	unsigned long persistent_keepalive_interval;
+	bool timers_enabled, timer_need_another_keepalive, sent_lastminute_handshake;
+	struct timespec walltime_last_handshake;
+	struct kref refcount;
+	struct rcu_head rcu;
+	struct list_head peer_list;
+	u64 internal_id;
+};
+
+struct wireguard_peer *peer_create(struct wireguard_device *wg, const u8 public_key[NOISE_PUBLIC_KEY_LEN], const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]);
+
+struct wireguard_peer *peer_get(struct wireguard_peer *peer);
+struct wireguard_peer *peer_rcu_get(struct wireguard_peer *peer);
+
+void peer_put(struct wireguard_peer *peer);
+void peer_remove(struct wireguard_peer *peer);
+void peer_remove_all(struct wireguard_device *wg);
+
+struct wireguard_peer *peer_lookup_by_index(struct wireguard_device *wg, u32 index);
+
+#endif /* _WG_PEER_H */
diff --git b/net/wireguard/queueing.c b/net/wireguard/queueing.c
new file mode 100644
index 0000000..e38262a
--- /dev/null
+++ b/net/wireguard/queueing.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+
+struct multicore_worker __percpu *packet_alloc_percpu_multicore_worker(work_func_t function, void *ptr)
+{
+	int cpu;
+	struct multicore_worker __percpu *worker = alloc_percpu(struct multicore_worker);
+
+	if (!worker)
+		return NULL;
+
+	for_each_possible_cpu(cpu) {
+		per_cpu_ptr(worker, cpu)->ptr = ptr;
+		INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function);
+	}
+	return worker;
+}
+
+int packet_queue_init(struct crypt_queue *queue, work_func_t function, bool multicore, unsigned int len)
+{
+	int ret;
+
+	memset(queue, 0, sizeof(*queue));
+	ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
+	if (ret)
+		return ret;
+	if (multicore) {
+		queue->worker = packet_alloc_percpu_multicore_worker(function, queue);
+		if (!queue->worker)
+			return -ENOMEM;
+	} else
+		INIT_WORK(&queue->work, function);
+	return 0;
+}
+
+void packet_queue_free(struct crypt_queue *queue, bool multicore)
+{
+	if (multicore)
+		free_percpu(queue->worker);
+	WARN_ON(!ptr_ring_empty_bh(&queue->ring));
+	ptr_ring_cleanup(&queue->ring, NULL);
+}
diff --git b/net/wireguard/queueing.h b/net/wireguard/queueing.h
new file mode 100644
index 0000000..967753f
--- /dev/null
+++ b/net/wireguard/queueing.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_QUEUEING_H
+#define _WG_QUEUEING_H
+
+#include "peer.h"
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+struct wireguard_device;
+struct wireguard_peer;
+struct multicore_worker;
+struct crypt_queue;
+struct sk_buff;
+
+/* queueing.c APIs: */
+int packet_queue_init(struct crypt_queue *queue, work_func_t function, bool multicore, unsigned int len);
+void packet_queue_free(struct crypt_queue *queue, bool multicore);
+struct multicore_worker __percpu *packet_alloc_percpu_multicore_worker(work_func_t function, void *ptr);
+
+/* receive.c APIs: */
+void packet_receive(struct wireguard_device *wg, struct sk_buff *skb);
+void packet_handshake_receive_worker(struct work_struct *work);
+/* Workqueue workers: */
+void packet_rx_worker(struct work_struct *work);
+void packet_decrypt_worker(struct work_struct *work);
+
+/* send.c APIs: */
+void packet_send_queued_handshake_initiation(struct wireguard_peer *peer, bool is_retry);
+void packet_send_handshake_response(struct wireguard_peer *peer);
+void packet_send_handshake_cookie(struct wireguard_device *wg, struct sk_buff *initiating_skb, __le32 sender_index);
+void packet_send_keepalive(struct wireguard_peer *peer);
+void packet_send_staged_packets(struct wireguard_peer *peer);
+/* Workqueue workers: */
+void packet_handshake_send_worker(struct work_struct *work);
+void packet_tx_worker(struct work_struct *work);
+void packet_encrypt_worker(struct work_struct *work);
+
+enum packet_state { PACKET_STATE_UNCRYPTED, PACKET_STATE_CRYPTED, PACKET_STATE_DEAD };
+struct packet_cb {
+	u64 nonce;
+	struct noise_keypair *keypair;
+	atomic_t state;
+	u8 ds;
+};
+#define PACKET_PEER(skb) (((struct packet_cb *)skb->cb)->keypair->entry.peer)
+#define PACKET_CB(skb) ((struct packet_cb *)skb->cb)
+
+/* Returns either the correct skb->protocol value, or 0 if invalid. */
+static inline __be16 skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
+{
+	if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && ip_hdr(skb)->version == 4)
+		return htons(ETH_P_IP);
+	if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && ipv6_hdr(skb)->version == 6)
+		return htons(ETH_P_IPV6);
+	return 0;
+}
+
+static inline void skb_reset(struct sk_buff *skb)
+{
+	skb_scrub_packet(skb, false);
+	memset(&skb->headers_start, 0, offsetof(struct sk_buff, headers_end) - offsetof(struct sk_buff, headers_start));
+	skb->queue_mapping = 0;
+	skb->nohdr = 0;
+	skb->peeked = 0;
+	skb->mac_len = 0;
+	skb->dev = NULL;
+#ifdef CONFIG_NET_SCHED
+	skb->tc_index = 0;
+	skb_reset_tc(skb);
+#endif
+	skb->hdr_len = skb_headroom(skb);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_probe_transport_header(skb, 0);
+	skb_reset_inner_headers(skb);
+}
+
+static inline int cpumask_choose_online(int *stored_cpu, unsigned int id)
+{
+	unsigned int cpu = *stored_cpu, cpu_index, i;
+
+	if (unlikely(cpu == nr_cpumask_bits || !cpumask_test_cpu(cpu, cpu_online_mask))) {
+		cpu_index = id % cpumask_weight(cpu_online_mask);
+		cpu = cpumask_first(cpu_online_mask);
+		for (i = 0; i < cpu_index; ++i)
+			cpu = cpumask_next(cpu, cpu_online_mask);
+		*stored_cpu = cpu;
+	}
+	return cpu;
+}
+
+/* This function is racy, in the sense that next is unlocked, so it could return
+ * the same CPU twice. A race-free version of this would be to instead store an
+ * atomic sequence number, do an increment-and-return, and then iterate through
+ * every possible CPU until we get to that index -- choose_cpu. However that's
+ * a bit slower, and it doesn't seem like this potential race actually introduces
+ * any performance loss, so we live with it.
+ */
+static inline int cpumask_next_online(int *next)
+{
+	int cpu = *next;
+
+	while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
+		cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
+	*next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
+	return cpu;
+}
+
+static inline int queue_enqueue_per_device_and_peer(struct crypt_queue *device_queue, struct crypt_queue *peer_queue, struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
+{
+	int cpu;
+
+	atomic_set(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED);
+	if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
+		return -ENOSPC;
+	cpu = cpumask_next_online(next_cpu);
+	if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
+		return -EPIPE;
+	queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
+	return 0;
+}
+
+static inline void queue_enqueue_per_peer(struct crypt_queue *queue, struct sk_buff *skb, enum packet_state state)
+{
+	struct wireguard_peer *peer = peer_rcu_get(PACKET_PEER(skb));
+
+	atomic_set(&PACKET_CB(skb)->state, state);
+	queue_work_on(cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id), peer->device->packet_crypt_wq, &queue->work);
+	peer_put(peer);
+}
+
+#ifdef DEBUG
+bool packet_counter_selftest(void);
+#endif
+
+#endif /* _WG_QUEUEING_H */
diff --git b/net/wireguard/ratelimiter.c b/net/wireguard/ratelimiter.c
new file mode 100644
index 0000000..3f89e94
--- /dev/null
+++ b/net/wireguard/ratelimiter.c
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "ratelimiter.h"
+#include <linux/siphash.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <net/ip.h>
+
+static struct kmem_cache *entry_cache;
+static hsiphash_key_t key;
+static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");
+static atomic64_t refcnt = ATOMIC64_INIT(0);
+static atomic_t total_entries = ATOMIC_INIT(0);
+static unsigned int max_entries, table_size;
+static void gc_entries(struct work_struct *);
+static DECLARE_DEFERRABLE_WORK(gc_work, gc_entries);
+static struct hlist_head *table_v4;
+#if IS_ENABLED(CONFIG_IPV6)
+static struct hlist_head *table_v6;
+#endif
+
+struct ratelimiter_entry {
+	u64 last_time_ns, tokens;
+	__be64 ip;
+	void *net;
+	spinlock_t lock;
+	struct hlist_node hash;
+	struct rcu_head rcu;
+};
+
+enum {
+	PACKETS_PER_SECOND = 20,
+	PACKETS_BURSTABLE = 5,
+	PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,
+	TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE
+};
+
+static void entry_free(struct rcu_head *rcu)
+{
+	kmem_cache_free(entry_cache, container_of(rcu, struct ratelimiter_entry, rcu));
+	atomic_dec(&total_entries);
+}
+
+static void entry_uninit(struct ratelimiter_entry *entry)
+{
+	hlist_del_rcu(&entry->hash);
+	call_rcu(&entry->rcu, entry_free);
+}
+
+/* Calling this function with a NULL work uninits all entries. */
+static void gc_entries(struct work_struct *work)
+{
+	unsigned int i;
+	struct ratelimiter_entry *entry;
+	struct hlist_node *temp;
+	const u64 now = ktime_get_ns();
+
+	for (i = 0; i < table_size; ++i) {
+		spin_lock(&table_lock);
+		hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) {
+			if (unlikely(!work) || now - entry->last_time_ns > NSEC_PER_SEC)
+				entry_uninit(entry);
+		}
+#if IS_ENABLED(CONFIG_IPV6)
+		hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) {
+			if (unlikely(!work) || now - entry->last_time_ns > NSEC_PER_SEC)
+				entry_uninit(entry);
+		}
+#endif
+		spin_unlock(&table_lock);
+		if (likely(work))
+			cond_resched();
+	}
+	if (likely(work))
+		queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
+}
+
+bool ratelimiter_allow(struct sk_buff *skb, struct net *net)
+{
+	struct ratelimiter_entry *entry;
+	struct hlist_head *bucket;
+	struct { __be64 ip; u32 net; } data = { .net = (unsigned long)net & 0xffffffff };
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		data.ip = (__force __be64)ip_hdr(skb)->saddr;
+		bucket = &table_v4[hsiphash(&data, sizeof(u32) * 3, &key) & (table_size - 1)];
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (skb->protocol == htons(ETH_P_IPV6)) {
+		memcpy(&data.ip, &ipv6_hdr(skb)->saddr, sizeof(__be64)); /* Only 64 bits */
+		bucket = &table_v6[hsiphash(&data, sizeof(u32) * 3, &key) & (table_size - 1)];
+	}
+#endif
+	else
+		return false;
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, bucket, hash) {
+		if (entry->net == net && entry->ip == data.ip) {
+			u64 now, tokens;
+			bool ret;
+			/* Inspired by nft_limit.c, but this is actually a slightly different
+			 * algorithm. Namely, we incorporate the burst as part of the maximum
+			 * tokens, rather than as part of the rate.
+			 */
+			spin_lock(&entry->lock);
+			now = ktime_get_ns();
+			tokens = min_t(u64, TOKEN_MAX, entry->tokens + now - entry->last_time_ns);
+			entry->last_time_ns = now;
+			ret = tokens >= PACKET_COST;
+			entry->tokens = ret ? tokens - PACKET_COST : tokens;
+			spin_unlock(&entry->lock);
+			rcu_read_unlock();
+			return ret;
+		}
+	}
+	rcu_read_unlock();
+
+	if (atomic_inc_return(&total_entries) > max_entries)
+		goto err_oom;
+
+	entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);
+	if (!entry)
+		goto err_oom;
+
+	entry->net = net;
+	entry->ip = data.ip;
+	INIT_HLIST_NODE(&entry->hash);
+	spin_lock_init(&entry->lock);
+	entry->last_time_ns = ktime_get_ns();
+	entry->tokens = TOKEN_MAX - PACKET_COST;
+	spin_lock(&table_lock);
+	hlist_add_head_rcu(&entry->hash, bucket);
+	spin_unlock(&table_lock);
+	return true;
+
+err_oom:
+	atomic_dec(&total_entries);
+	return false;
+}
+
+int ratelimiter_init(void)
+{
+	if (atomic64_inc_return(&refcnt) != 1)
+		return 0;
+
+	entry_cache = KMEM_CACHE(ratelimiter_entry, 0);
+	if (!entry_cache)
+		goto err;
+
+	/* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
+	 * but what it shares in common is that it uses a massive hashtable. So,
+	 * we borrow their wisdom about good table sizes on different systems
+	 * dependent on RAM. This calculation here comes from there.
+	 */
+	table_size = (totalram_pages > (1U << 30) / PAGE_SIZE) ? 8192 : max_t(unsigned long, 16, roundup_pow_of_two((totalram_pages << PAGE_SHIFT) / (1U << 14) / sizeof(struct hlist_head)));
+	max_entries = table_size * 8;
+
+	table_v4 = kvzalloc(table_size * sizeof(struct hlist_head), GFP_KERNEL);
+	if (!table_v4)
+		goto err_kmemcache;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	table_v6 = kvzalloc(table_size * sizeof(struct hlist_head), GFP_KERNEL);
+	if (!table_v6) {
+		kvfree(table_v4);
+		goto err_kmemcache;
+	}
+#endif
+
+	queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
+	get_random_bytes(&key, sizeof(key));
+	return 0;
+
+err_kmemcache:
+	kmem_cache_destroy(entry_cache);
+err:
+	atomic64_dec(&refcnt);
+	return -ENOMEM;
+}
+
+void ratelimiter_uninit(void)
+{
+	if (atomic64_dec_return(&refcnt))
+		return;
+
+	cancel_delayed_work_sync(&gc_work);
+	gc_entries(NULL);
+	rcu_barrier();
+	kvfree(table_v4);
+#if IS_ENABLED(CONFIG_IPV6)
+	kvfree(table_v6);
+#endif
+	kmem_cache_destroy(entry_cache);
+}
+
+#include "selftest/ratelimiter.h"
diff --git b/net/wireguard/ratelimiter.h b/net/wireguard/ratelimiter.h
new file mode 100644
index 0000000..7833eb3
--- /dev/null
+++ b/net/wireguard/ratelimiter.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_RATELIMITER_H
+#define _WG_RATELIMITER_H
+
+#include <linux/skbuff.h>
+
+int ratelimiter_init(void);
+void ratelimiter_uninit(void);
+bool ratelimiter_allow(struct sk_buff *skb, struct net *net);
+
+#ifdef DEBUG
+bool ratelimiter_selftest(void);
+#endif
+
+#endif /* _WG_RATELIMITER_H */
diff --git b/net/wireguard/receive.c b/net/wireguard/receive.c
new file mode 100644
index 0000000..683c856
--- /dev/null
+++ b/net/wireguard/receive.c
@@ -0,0 +1,490 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+#include "device.h"
+#include "peer.h"
+#include "timers.h"
+#include "messages.h"
+#include "cookie.h"
+#include "socket.h"
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <net/ip_tunnels.h>
+
+/* Must be called with bh disabled. */
+static inline void rx_stats(struct wireguard_peer *peer, size_t len)
+{
+	struct pcpu_sw_netstats *tstats = get_cpu_ptr(peer->device->dev->tstats);
+
+	u64_stats_update_begin(&tstats->syncp);
+	++tstats->rx_packets;
+	tstats->rx_bytes += len;
+	peer->rx_bytes += len;
+	u64_stats_update_end(&tstats->syncp);
+	put_cpu_ptr(tstats);
+}
+
+#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type)
+
+static inline size_t validate_header_len(struct sk_buff *skb)
+{
+	if (unlikely(skb->len < sizeof(struct message_header)))
+		return 0;
+	if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) && skb->len >= MESSAGE_MINIMUM_LENGTH)
+		return sizeof(struct message_data);
+	if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) && skb->len == sizeof(struct message_handshake_initiation))
+		return sizeof(struct message_handshake_initiation);
+	if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) && skb->len == sizeof(struct message_handshake_response))
+		return sizeof(struct message_handshake_response);
+	if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) && skb->len == sizeof(struct message_handshake_cookie))
+		return sizeof(struct message_handshake_cookie);
+	return 0;
+}
+
+static inline int skb_prepare_header(struct sk_buff *skb, struct wireguard_device *wg)
+{
+	struct udphdr *udp;
+	size_t data_offset, data_len, header_len;
+
+	if (unlikely(skb_examine_untrusted_ip_hdr(skb) != skb->protocol || skb_transport_header(skb) < skb->head || (skb_transport_header(skb) + sizeof(struct udphdr)) > skb_tail_pointer(skb)))
+		return -EINVAL; /* Bogus IP header */
+	udp = udp_hdr(skb);
+	data_offset = (u8 *)udp - skb->data;
+	if (unlikely(data_offset > U16_MAX || data_offset + sizeof(struct udphdr) > skb->len))
+		return -EINVAL;  /* Packet has offset at impossible location or isn't big enough to have UDP fields*/
+	data_len = ntohs(udp->len);
+	if (unlikely(data_len < sizeof(struct udphdr) || data_len > skb->len - data_offset))
+		return -EINVAL;  /* UDP packet is reporting too small of a size or lying about its size */
+	data_len -= sizeof(struct udphdr);
+	data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data;
+	if (unlikely(!pskb_may_pull(skb, data_offset + sizeof(struct message_header)) || pskb_trim(skb, data_len + data_offset) < 0))
+		return -EINVAL;
+	skb_pull(skb, data_offset);
+	if (unlikely(skb->len != data_len))
+		return -EINVAL; /* Final len does not agree with calculated len */
+	header_len = validate_header_len(skb);
+	if (unlikely(!header_len))
+		return -EINVAL;
+	__skb_push(skb, data_offset);
+	if (unlikely(!pskb_may_pull(skb, data_offset + header_len)))
+		return -EINVAL;
+	__skb_pull(skb, data_offset);
+	return 0;
+}
+
+static void receive_handshake_packet(struct wireguard_device *wg, struct sk_buff *skb)
+{
+	static u64 last_under_load; /* Yes this is global, so that our load calculation applies to the whole system. */
+	struct wireguard_peer *peer = NULL;
+	bool under_load;
+	enum cookie_mac_state mac_state;
+	bool packet_needs_cookie;
+
+	if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) {
+		net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n", wg->dev->name, skb);
+		cookie_message_consume((struct message_handshake_cookie *)skb->data, wg);
+		return;
+	}
+
+	under_load = skb_queue_len(&wg->incoming_handshakes) >= MAX_QUEUED_INCOMING_HANDSHAKES / 8;
+	if (under_load)
+		last_under_load = get_jiffies_64();
+	else if (last_under_load)
+		under_load = time_is_after_jiffies64(last_under_load + HZ);
+	mac_state = cookie_validate_packet(&wg->cookie_checker, skb, under_load);
+	if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE))
+		packet_needs_cookie = false;
+	else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)
+		packet_needs_cookie = true;
+	else {
+		net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n", wg->dev->name, skb);
+		return;
+	}
+
+	switch (SKB_TYPE_LE32(skb)) {
+	case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): {
+		struct message_handshake_initiation *message = (struct message_handshake_initiation *)skb->data;
+
+		if (packet_needs_cookie) {
+			packet_send_handshake_cookie(wg, skb, message->sender_index);
+			return;
+		}
+		peer = noise_handshake_consume_initiation(message, wg);
+		if (unlikely(!peer)) {
+			net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n", wg->dev->name, skb);
+			return;
+		}
+		socket_set_peer_endpoint_from_skb(peer, skb);
+		net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n", wg->dev->name, peer->internal_id, &peer->endpoint.addr);
+		packet_send_handshake_response(peer);
+		break;
+	}
+	case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): {
+		struct message_handshake_response *message = (struct message_handshake_response *)skb->data;
+
+		if (packet_needs_cookie) {
+			packet_send_handshake_cookie(wg, skb, message->sender_index);
+			return;
+		}
+		peer = noise_handshake_consume_response(message, wg);
+		if (unlikely(!peer)) {
+			net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n", wg->dev->name, skb);
+			return;
+		}
+		socket_set_peer_endpoint_from_skb(peer, skb);
+		net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n", wg->dev->name, peer->internal_id, &peer->endpoint.addr);
+		if (noise_handshake_begin_session(&peer->handshake, &peer->keypairs)) {
+			timers_session_derived(peer);
+			timers_handshake_complete(peer);
+			/* Calling this function will either send any existing packets in the queue
+			 * and not send a keepalive, which is the best case, Or, if there's nothing
+			 * in the queue, it will send a keepalive, in order to give immediate
+			 * confirmation of the session.
+			 */
+			packet_send_keepalive(peer);
+		}
+		break;
+	}
+	}
+
+	if (unlikely(!peer)) {
+		WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n");
+		return;
+	}
+
+	local_bh_disable();
+	rx_stats(peer, skb->len);
+	local_bh_enable();
+
+	timers_any_authenticated_packet_received(peer);
+	timers_any_authenticated_packet_traversal(peer);
+	peer_put(peer);
+}
+
+void packet_handshake_receive_worker(struct work_struct *work)
+{
+	struct wireguard_device *wg = container_of(work, struct multicore_worker, work)->ptr;
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
+		receive_handshake_packet(wg, skb);
+		dev_kfree_skb(skb);
+		cond_resched();
+	}
+}
+
+static inline void keep_key_fresh(struct wireguard_peer *peer)
+{
+	struct noise_keypair *keypair;
+	bool send = false;
+
+	if (peer->sent_lastminute_handshake)
+		return;
+
+	rcu_read_lock_bh();
+	keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
+	if (likely(keypair && keypair->sending.is_valid) && keypair->i_am_the_initiator &&
+	    unlikely(time_is_before_eq_jiffies64(keypair->sending.birthdate + REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT)))
+		send = true;
+	rcu_read_unlock_bh();
+
+	if (send) {
+		peer->sent_lastminute_handshake = true;
+		packet_send_queued_handshake_initiation(peer, false);
+	}
+}
+
+static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key *key, bool have_simd)
+{
+	struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1];
+	struct sk_buff *trailer;
+	unsigned int offset;
+	int num_frags;
+
+	if (unlikely(!key))
+		return false;
+
+	if (unlikely(!key->is_valid || time_is_before_eq_jiffies64(key->birthdate + REJECT_AFTER_TIME) || key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) {
+		key->is_valid = false;
+		return false;
+	}
+
+	PACKET_CB(skb)->nonce = le64_to_cpu(((struct message_data *)skb->data)->counter);
+
+	/* We ensure that the network header is part of the packet before we
+	 * call skb_cow_data, so that there's no chance that data is removed
+	 * from the skb, so that later we can extract the original endpoint.
+	 */
+	offset = skb->data - skb_network_header(skb);
+	skb_push(skb, offset);
+	num_frags = skb_cow_data(skb, 0, &trailer);
+	offset += sizeof(struct message_data);
+	skb_pull(skb, offset);
+	if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
+		return false;
+
+	sg_init_table(sg, num_frags);
+	if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0)
+		return false;
+
+	if (!chacha20poly1305_decrypt_sg(sg, sg, skb->len, NULL, 0, PACKET_CB(skb)->nonce, key->key, have_simd))
+		return false;
+
+	/* Another ugly situation of pushing and pulling the header so as to
+	 * keep endpoint information intact.
+	 */
+	skb_push(skb, offset);
+	if (pskb_trim(skb, skb->len - noise_encrypted_len(0)))
+		return false;
+	skb_pull(skb, offset);
+
+	return true;
+}
+
+/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */
+static inline bool counter_validate(union noise_counter *counter, u64 their_counter)
+{
+	bool ret = false;
+	unsigned long index, index_current, top, i;
+
+	spin_lock_bh(&counter->receive.lock);
+
+	if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || their_counter >= REJECT_AFTER_MESSAGES))
+		goto out;
+
+	++their_counter;
+
+	if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < counter->receive.counter))
+		goto out;
+
+	index = their_counter >> ilog2(BITS_PER_LONG);
+
+	if (likely(their_counter > counter->receive.counter)) {
+		index_current = counter->receive.counter >> ilog2(BITS_PER_LONG);
+		top = min_t(unsigned long, index - index_current, COUNTER_BITS_TOTAL / BITS_PER_LONG);
+		for (i = 1; i <= top; ++i)
+			counter->receive.backtrack[(i + index_current) & ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0;
+		counter->receive.counter = their_counter;
+	}
+
+	index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1;
+	ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), &counter->receive.backtrack[index]);
+
+out:
+	spin_unlock_bh(&counter->receive.lock);
+	return ret;
+}
+#include "selftest/counter.h"
+
+static void packet_consume_data_done(struct sk_buff *skb, struct endpoint *endpoint)
+{
+	struct wireguard_peer *peer = PACKET_PEER(skb), *routed_peer;
+	struct net_device *dev = peer->device->dev;
+	unsigned int len, len_before_trim;
+
+	socket_set_peer_endpoint(peer, endpoint);
+
+	if (unlikely(noise_received_with_keypair(&peer->keypairs, PACKET_CB(skb)->keypair))) {
+		timers_handshake_complete(peer);
+		packet_send_staged_packets(peer);
+	}
+
+	keep_key_fresh(peer);
+
+	/* A packet with length 0 is a keepalive packet */
+	if (unlikely(!skb->len)) {
+		rx_stats(peer, message_data_len(0));
+		net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n", dev->name, peer->internal_id, &peer->endpoint.addr);
+		goto packet_processed;
+	}
+
+	if (unlikely(skb_network_header(skb) < skb->head))
+		goto dishonest_packet_size;
+	if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) && (ip_hdr(skb)->version == 4 || (ip_hdr(skb)->version == 6 && pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))))))
+		goto dishonest_packet_type;
+
+	skb->dev = dev;
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	skb->protocol = skb_examine_untrusted_ip_hdr(skb);
+	if (skb->protocol == htons(ETH_P_IP)) {
+		len = ntohs(ip_hdr(skb)->tot_len);
+		if (unlikely(len < sizeof(struct iphdr)))
+			goto dishonest_packet_size;
+		if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
+			IP_ECN_set_ce(ip_hdr(skb));
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
+		if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
+			IP6_ECN_set_ce(skb, ipv6_hdr(skb));
+	} else
+		goto dishonest_packet_type;
+
+	if (unlikely(len > skb->len))
+		goto dishonest_packet_size;
+	len_before_trim = skb->len;
+	if (unlikely(pskb_trim(skb, len)))
+		goto packet_processed;
+
+	timers_data_received(peer);
+
+	routed_peer = allowedips_lookup_src(&peer->device->peer_allowedips, skb);
+	peer_put(routed_peer); /* We don't need the extra reference. */
+
+	if (unlikely(routed_peer != peer))
+		goto dishonest_packet_peer;
+
+	if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) {
+		++dev->stats.rx_dropped;
+		net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n", dev->name, peer->internal_id, &peer->endpoint.addr);
+	} else
+		rx_stats(peer, message_data_len(len_before_trim));
+	goto continue_processing;
+
+dishonest_packet_peer:
+	net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n", dev->name, skb, peer->internal_id, &peer->endpoint.addr);
+	++dev->stats.rx_errors;
+	++dev->stats.rx_frame_errors;
+	goto packet_processed;
+dishonest_packet_type:
+	net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n", dev->name, peer->internal_id, &peer->endpoint.addr);
+	++dev->stats.rx_errors;
+	++dev->stats.rx_frame_errors;
+	goto packet_processed;
+dishonest_packet_size:
+	net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n", dev->name, peer->internal_id, &peer->endpoint.addr);
+	++dev->stats.rx_errors;
+	++dev->stats.rx_length_errors;
+	goto packet_processed;
+packet_processed:
+	dev_kfree_skb(skb);
+continue_processing:
+	timers_any_authenticated_packet_received(peer);
+	timers_any_authenticated_packet_traversal(peer);
+}
+
+void packet_rx_worker(struct work_struct *work)
+{
+	struct crypt_queue *queue = container_of(work, struct crypt_queue, work);
+	struct wireguard_peer *peer;
+	struct noise_keypair *keypair;
+	struct sk_buff *skb;
+	struct endpoint endpoint;
+	enum packet_state state;
+	bool free;
+
+	local_bh_disable();
+	spin_lock_bh(&queue->ring.consumer_lock);
+	while ((skb = __ptr_ring_peek(&queue->ring)) != NULL && (state = atomic_read(&PACKET_CB(skb)->state)) != PACKET_STATE_UNCRYPTED) {
+		__ptr_ring_discard_one(&queue->ring);
+		peer = PACKET_PEER(skb);
+		keypair = PACKET_CB(skb)->keypair;
+		free = true;
+
+		if (unlikely(state != PACKET_STATE_CRYPTED))
+			goto next;
+
+		if (unlikely(!counter_validate(&keypair->receiving.counter, PACKET_CB(skb)->nonce))) {
+			net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", peer->device->dev->name, PACKET_CB(skb)->nonce, keypair->receiving.counter.receive.counter);
+			goto next;
+		}
+
+		if (unlikely(socket_endpoint_from_skb(&endpoint, skb)))
+			goto next;
+
+		skb_reset(skb);
+		packet_consume_data_done(skb, &endpoint);
+		free = false;
+
+next:
+		noise_keypair_put(keypair);
+		peer_put(peer);
+		if (unlikely(free))
+			dev_kfree_skb(skb);
+	}
+	spin_unlock_bh(&queue->ring.consumer_lock);
+	local_bh_enable();
+}
+
+void packet_decrypt_worker(struct work_struct *work)
+{
+	struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr;
+	struct sk_buff *skb;
+	bool have_simd = chacha20poly1305_init_simd();
+
+	while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
+		enum packet_state state = likely(skb_decrypt(skb, &PACKET_CB(skb)->keypair->receiving, have_simd)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
+
+		queue_enqueue_per_peer(&PACKET_PEER(skb)->rx_queue, skb, state);
+	}
+
+	chacha20poly1305_deinit_simd(have_simd);
+}
+
+static void packet_consume_data(struct wireguard_device *wg, struct sk_buff *skb)
+{
+	struct wireguard_peer *peer;
+	__le32 idx = ((struct message_data *)skb->data)->key_idx;
+	int ret;
+
+	rcu_read_lock_bh();
+	PACKET_CB(skb)->keypair = noise_keypair_get((struct noise_keypair *)index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx));
+	rcu_read_unlock_bh();
+	if (unlikely(!PACKET_CB(skb)->keypair)) {
+		dev_kfree_skb(skb);
+		return;
+	}
+
+	/* The call to index_hashtable_lookup gives us a reference to its underlying peer, so we don't need to call peer_rcu_get(). */
+	peer = PACKET_PEER(skb);
+
+	ret = queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb, wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu);
+	if (likely(!ret))
+		return; /* Successful. No need to drop references below. */
+
+	if (ret == -EPIPE)
+		queue_enqueue_per_peer(&peer->rx_queue, skb, PACKET_STATE_DEAD);
+	else {
+		peer_put(peer);
+		noise_keypair_put(PACKET_CB(skb)->keypair);
+		dev_kfree_skb(skb);
+	}
+}
+
+void packet_receive(struct wireguard_device *wg, struct sk_buff *skb)
+{
+	if (unlikely(skb_prepare_header(skb, wg) < 0))
+		goto err;
+	switch (SKB_TYPE_LE32(skb)) {
+	case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
+	case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
+	case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
+		int cpu;
+
+		if (skb_queue_len(&wg->incoming_handshakes) > MAX_QUEUED_INCOMING_HANDSHAKES) {
+			net_dbg_skb_ratelimited("%s: Too many handshakes queued, dropping packet from %pISpfsc\n", wg->dev->name, skb);
+			goto err;
+		}
+		skb_queue_tail(&wg->incoming_handshakes, skb);
+		/* Queues up a call to packet_process_queued_handshake_packets(skb): */
+		cpu = cpumask_next_online(&wg->incoming_handshake_cpu);
+		queue_work_on(cpu, wg->handshake_receive_wq, &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
+		break;
+	}
+	case cpu_to_le32(MESSAGE_DATA):
+		PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
+		packet_consume_data(wg, skb);
+		break;
+	default:
+		net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n", wg->dev->name, skb);
+		goto err;
+	}
+	return;
+
+err:
+	dev_kfree_skb(skb);
+}
diff --git b/net/wireguard/selftest/allowedips.h b/net/wireguard/selftest/allowedips.h
new file mode 100644
index 0000000..24122b4
--- /dev/null
+++ b/net/wireguard/selftest/allowedips.h
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+
+#ifdef DEBUG_PRINT_TRIE_GRAPHVIZ
+#include <linux/siphash.h>
+static __init void print_node(struct allowedips_node *node, u8 bits)
+{
+	u32 color = 0;
+	char *style = "dotted";
+	char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
+	char *fmt_declaration = KERN_DEBUG "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
+	if (bits == 32) {
+		fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
+		fmt_declaration = KERN_DEBUG "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
+	} else if (bits == 128) {
+		fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
+		fmt_declaration = KERN_DEBUG "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
+	}
+	if (node->peer) {
+		hsiphash_key_t key = { 0 };
+		memcpy(&key, &node->peer, sizeof(node->peer));
+		color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 | hsiphash_1u32(0xbabecafe, &key) % 200 << 8 | hsiphash_1u32(0xabad1dea, &key) % 200;
+		style = "bold";
+	}
+	printk(fmt_declaration, node->bits, node->cidr, style, color);
+	if (node->bit[0]) {
+		printk(fmt_connection, node->bits, node->cidr, node->bit[0]->bits, node->bit[0]->cidr);
+		print_node(node->bit[0], bits);
+	}
+	if (node->bit[1]) {
+		printk(fmt_connection, node->bits, node->cidr, node->bit[1]->bits, node->bit[1]->cidr);
+		print_node(node->bit[1], bits);
+	}
+}
+static __init void print_tree(struct allowedips_node *top, u8 bits)
+{
+	printk(KERN_DEBUG "digraph trie {\n");
+	print_node(top, bits);
+	printk(KERN_DEBUG "}\n");
+}
+#endif
+
+#ifdef DEBUG_RANDOM_TRIE
+#define NUM_PEERS 2000
+#define NUM_RAND_ROUTES 400
+#define NUM_MUTATED_ROUTES 100
+#define NUM_QUERIES (NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30)
+#include <linux/random.h>
+struct horrible_allowedips {
+	struct hlist_head head;
+};
+struct horrible_allowedips_node {
+	struct hlist_node table;
+	union nf_inet_addr ip;
+	union nf_inet_addr mask;
+	uint8_t ip_version;
+	void *value;
+};
+static __init void horrible_allowedips_init(struct horrible_allowedips *table)
+{
+	INIT_HLIST_HEAD(&table->head);
+}
+static __init void horrible_allowedips_free(struct horrible_allowedips *table)
+{
+	struct hlist_node *h;
+	struct horrible_allowedips_node *node;
+	hlist_for_each_entry_safe(node, h, &table->head, table) {
+		hlist_del(&node->table);
+		kfree(node);
+	}
+}
+static __init inline union nf_inet_addr horrible_cidr_to_mask(uint8_t cidr)
+{
+	union nf_inet_addr mask;
+	memset(&mask, 0x00, 128 / 8);
+	memset(&mask, 0xff, cidr / 8);
+	if (cidr % 32)
+		mask.all[cidr / 32] = htonl((0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
+	return mask;
+}
+static __init inline uint8_t horrible_mask_to_cidr(union nf_inet_addr subnet)
+{
+	return hweight32(subnet.all[0])
+	     + hweight32(subnet.all[1])
+	     + hweight32(subnet.all[2])
+	     + hweight32(subnet.all[3]);
+}
+static __init inline void horrible_mask_self(struct horrible_allowedips_node *node)
+{
+	if (node->ip_version == 4)
+		node->ip.ip &= node->mask.ip;
+	else if (node->ip_version == 6) {
+		node->ip.ip6[0] &= node->mask.ip6[0];
+		node->ip.ip6[1] &= node->mask.ip6[1];
+		node->ip.ip6[2] &= node->mask.ip6[2];
+		node->ip.ip6[3] &= node->mask.ip6[3];
+	}
+}
+static __init inline bool horrible_match_v4(const struct horrible_allowedips_node *node, struct in_addr *ip)
+{
+	return (ip->s_addr & node->mask.ip) == node->ip.ip;
+}
+static __init inline bool horrible_match_v6(const struct horrible_allowedips_node *node, struct in6_addr *ip)
+{
+	return	(ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == node->ip.ip6[0] &&
+		(ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == node->ip.ip6[1] &&
+		(ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == node->ip.ip6[2] &&
+		(ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
+}
+static __init void horrible_insert_ordered(struct horrible_allowedips *table, struct horrible_allowedips_node *node)
+{
+	struct horrible_allowedips_node *other = NULL, *where = NULL;
+	uint8_t my_cidr = horrible_mask_to_cidr(node->mask);
+	hlist_for_each_entry(other, &table->head, table) {
+		if (!memcmp(&other->mask, &node->mask, sizeof(union nf_inet_addr)) &&
+		    !memcmp(&other->ip, &node->ip, sizeof(union nf_inet_addr)) &&
+		    other->ip_version == node->ip_version) {
+			other->value = node->value;
+			kfree(node);
+			return;
+		}
+		where = other;
+		if (horrible_mask_to_cidr(other->mask) <= my_cidr)
+			break;
+	}
+	if (!other && !where)
+		hlist_add_head(&node->table, &table->head);
+	else if (!other)
+		hlist_add_behind(&node->table, &where->table);
+	else
+		hlist_add_before(&node->table, &where->table);
+}
+static __init int horrible_allowedips_insert_v4(struct horrible_allowedips *table, struct in_addr *ip, uint8_t cidr, void *value)
+{
+	struct horrible_allowedips_node *node = kzalloc(sizeof(struct horrible_allowedips_node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+	node->ip.in = *ip;
+	node->mask = horrible_cidr_to_mask(cidr);
+	node->ip_version = 4;
+	node->value = value;
+	horrible_mask_self(node);
+	horrible_insert_ordered(table, node);
+	return 0;
+}
+static __init int horrible_allowedips_insert_v6(struct horrible_allowedips *table, struct in6_addr *ip, uint8_t cidr, void *value)
+{
+	struct horrible_allowedips_node *node = kzalloc(sizeof(struct horrible_allowedips_node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+	node->ip.in6 = *ip;
+	node->mask = horrible_cidr_to_mask(cidr);
+	node->ip_version = 6;
+	node->value = value;
+	horrible_mask_self(node);
+	horrible_insert_ordered(table, node);
+	return 0;
+}
+static __init void *horrible_allowedips_lookup_v4(struct horrible_allowedips *table, struct in_addr *ip)
+{
+	struct horrible_allowedips_node *node;
+	void *ret = NULL;
+	hlist_for_each_entry(node, &table->head, table) {
+		if (node->ip_version != 4)
+			continue;
+		if (horrible_match_v4(node, ip)) {
+			ret = node->value;
+			break;
+		}
+	}
+	return ret;
+}
+static __init void *horrible_allowedips_lookup_v6(struct horrible_allowedips *table, struct in6_addr *ip)
+{
+	struct horrible_allowedips_node *node;
+	void *ret = NULL;
+	hlist_for_each_entry(node, &table->head, table) {
+		if (node->ip_version != 6)
+			continue;
+		if (horrible_match_v6(node, ip)) {
+			ret = node->value;
+			break;
+		}
+	}
+	return ret;
+}
+
+static __init bool randomized_test(void)
+{
+	DEFINE_MUTEX(mutex);
+	bool ret = false;
+	unsigned int i, j, k, mutate_amount, cidr;
+	struct wireguard_peer **peers, *peer;
+	struct allowedips t;
+	struct horrible_allowedips h;
+	u8 ip[16], mutate_mask[16], mutated[16];
+
+	allowedips_init(&t);
+	horrible_allowedips_init(&h);
+
+	peers = kcalloc(NUM_PEERS, sizeof(struct wireguard_peer *), GFP_KERNEL);
+	if (!peers) {
+		pr_info("allowedips random self-test: out of memory\n");
+		goto free;
+	}
+	for (i = 0; i < NUM_PEERS; ++i) {
+		peers[i] = kzalloc(sizeof(struct wireguard_peer), GFP_KERNEL);
+		if (!peers[i]) {
+			pr_info("allowedips random self-test: out of memory\n");
+			goto free;
+		}
+		kref_init(&peers[i]->refcount);
+	}
+
+	for (i = 0; i < NUM_RAND_ROUTES; ++i) {
+		prandom_bytes(ip, 4);
+		cidr = prandom_u32_max(32) + 1;
+		peer = peers[prandom_u32_max(NUM_PEERS)];
+		if (allowedips_insert_v4(&t, (struct in_addr *)ip, cidr, peer, &mutex) < 0) {
+			pr_info("allowedips random self-test: out of memory\n");
+			goto free;
+		}
+		if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip, cidr, peer) < 0) {
+			pr_info("allowedips random self-test: out of memory\n");
+			goto free;
+		}
+		for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
+			memcpy(mutated, ip, 4);
+			prandom_bytes(mutate_mask, 4);
+			mutate_amount = prandom_u32_max(32);
+			for (k = 0; k < mutate_amount / 8; ++k)
+				mutate_mask[k] = 0xff;
+			mutate_mask[k] = 0xff << ((8 - (mutate_amount % 8)) % 8);
+			for (; k < 4; ++k)
+				mutate_mask[k] = 0;
+			for (k = 0; k < 4; ++k)
+				mutated[k] = (mutated[k] & mutate_mask[k]) | (~mutate_mask[k] & prandom_u32_max(256));
+			cidr = prandom_u32_max(32) + 1;
+			peer = peers[prandom_u32_max(NUM_PEERS)];
+			if (allowedips_insert_v4(&t, (struct in_addr *)mutated, cidr, peer, &mutex) < 0) {
+				pr_info("allowedips random self-test: out of memory\n");
+				goto free;
+			}
+			if (horrible_allowedips_insert_v4(&h, (struct in_addr *)mutated, cidr, peer)) {
+				pr_info("allowedips random self-test: out of memory\n");
+				goto free;
+			}
+		}
+	}
+
+	for (i = 0; i < NUM_RAND_ROUTES; ++i) {
+		prandom_bytes(ip, 16);
+		cidr = prandom_u32_max(128) + 1;
+		peer = peers[prandom_u32_max(NUM_PEERS)];
+		if (allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr, peer, &mutex) < 0) {
+			pr_info("allowedips random self-test: out of memory\n");
+			goto free;
+		}
+		if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip, cidr, peer) < 0) {
+			pr_info("allowedips random self-test: out of memory\n");
+			goto free;
+		}
+		for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
+			memcpy(mutated, ip, 16);
+			prandom_bytes(mutate_mask, 16);
+			mutate_amount = prandom_u32_max(128);
+			for (k = 0; k < mutate_amount / 8; ++k)
+				mutate_mask[k] = 0xff;
+			mutate_mask[k] = 0xff << ((8 - (mutate_amount % 8)) % 8);
+			for (; k < 4; ++k)
+				mutate_mask[k] = 0;
+			for (k = 0; k < 4; ++k)
+				mutated[k] = (mutated[k] & mutate_mask[k]) | (~mutate_mask[k] & prandom_u32_max(256));
+			cidr = prandom_u32_max(128) + 1;
+			peer = peers[prandom_u32_max(NUM_PEERS)];
+			if (allowedips_insert_v6(&t, (struct in6_addr *)mutated, cidr, peer, &mutex) < 0) {
+				pr_info("allowedips random self-test: out of memory\n");
+				goto free;
+			}
+			if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)mutated, cidr, peer)) {
+				pr_info("allowedips random self-test: out of memory\n");
+				goto free;
+			}
+		}
+	}
+
+#ifdef DEBUG_PRINT_TRIE_GRAPHVIZ
+	print_tree(t.root4, 32);
+	print_tree(t.root6, 128);
+#endif
+
+	for (i = 0; i < NUM_QUERIES; ++i) {
+		prandom_bytes(ip, 4);
+		if (lookup(t.root4, 32, ip) != horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
+			pr_info("allowedips random self-test: FAIL\n");
+			goto free;
+		}
+	}
+
+	for (i = 0; i < NUM_QUERIES; ++i) {
+		prandom_bytes(ip, 16);
+		if (lookup(t.root6, 128, ip) != horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
+			pr_info("allowedips random self-test: FAIL\n");
+			goto free;
+		}
+	}
+	ret = true;
+
+free:
+	allowedips_free(&t, &mutex);
+	horrible_allowedips_free(&h);
+	if (peers) {
+		for (i = 0; i < NUM_PEERS; ++i)
+			kfree(peers[i]);
+	}
+	kfree(peers);
+	return ret;
+}
+#endif
+
+static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d)
+{
+	static struct in_addr ip;
+	u8 *split = (u8 *)&ip;
+	split[0] = a;
+	split[1] = b;
+	split[2] = c;
+	split[3] = d;
+	return &ip;
+}
+static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d)
+{
+	static struct in6_addr ip;
+	__be32 *split = (__be32 *)&ip;
+	split[0] = cpu_to_be32(a);
+	split[1] = cpu_to_be32(b);
+	split[2] = cpu_to_be32(c);
+	split[3] = cpu_to_be32(d);
+	return &ip;
+}
+
+#define init_peer(name) do { \
+	name = kzalloc(sizeof(struct wireguard_peer), GFP_KERNEL); \
+	if (!name) { \
+		pr_info("allowedips self-test: out of memory\n"); \
+		goto free; \
+	} \
+	kref_init(&name->refcount); \
+} while (0)
+
+#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \
+	allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), cidr, mem, &mutex)
+
+#define maybe_fail \
+	++i; \
+	if (!_s) { \
+		pr_info("allowedips self-test %zu: FAIL\n", i); \
+		success = false; \
+	}
+
+#define test(version, mem, ipa, ipb, ipc, ipd) do { \
+	bool _s = lookup(t.root##version, version == 4 ? 32 : 128, ip##version(ipa, ipb, ipc, ipd)) == mem; \
+	maybe_fail \
+} while (0)
+
+#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \
+	bool _s = lookup(t.root##version, version == 4 ? 32 : 128, ip##version(ipa, ipb, ipc, ipd)) != mem; \
+	maybe_fail \
+} while (0)
+
+bool __init allowedips_selftest(void)
+{
+	DEFINE_MUTEX(mutex);
+	struct allowedips t;
+	struct wireguard_peer *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *f = NULL, *g = NULL, *h = NULL;
+	size_t i = 0;
+	bool success = false;
+	struct in6_addr ip;
+	__be64 part;
+
+	mutex_init(&mutex);
+
+	mutex_lock(&mutex);
+
+	allowedips_init(&t);
+	init_peer(a);
+	init_peer(b);
+	init_peer(c);
+	init_peer(d);
+	init_peer(e);
+	init_peer(f);
+	init_peer(g);
+	init_peer(h);
+
+	insert(4, a, 192, 168, 4, 0, 24);
+	insert(4, b, 192, 168, 4, 4, 32);
+	insert(4, c, 192, 168, 0, 0, 16);
+	insert(4, d, 192, 95, 5, 64, 27);
+	insert(4, c, 192, 95, 5, 65, 27); /* replaces previous entry, and maskself is required */
+	insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
+	insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64);
+	insert(4, e, 0, 0, 0, 0, 0);
+	insert(6, e, 0, 0, 0, 0, 0);
+	insert(6, f, 0, 0, 0, 0, 0); /* replaces previous entry */
+	insert(6, g, 0x24046800, 0, 0, 0, 32);
+	insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64); /* maskself is required */
+	insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128);
+	insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128);
+	insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98);
+	insert(4, g, 64, 15, 112, 0, 20);
+	insert(4, h, 64, 15, 123, 211, 25); /* maskself is required */
+	insert(4, a, 10, 0, 0, 0, 25);
+	insert(4, b, 10, 0, 0, 128, 25);
+	insert(4, a, 10, 1, 0, 0, 30);
+	insert(4, b, 10, 1, 0, 4, 30);
+	insert(4, c, 10, 1, 0, 8, 29);
+	insert(4, d, 10, 1, 0, 16, 29);
+
+#ifdef DEBUG_PRINT_TRIE_GRAPHVIZ
+	print_tree(t.root4, 32);
+	print_tree(t.root6, 128);
+#endif
+
+	success = true;
+
+	test(4, a, 192, 168, 4, 20);
+	test(4, a, 192, 168, 4, 0);
+	test(4, b, 192, 168, 4, 4);
+	test(4, c, 192, 168, 200, 182);
+	test(4, c, 192, 95, 5, 68);
+	test(4, e, 192, 95, 5, 96);
+	test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543);
+	test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee);
+	test(6, f, 0x26075300, 0x60006b01, 0, 0);
+	test(6, g, 0x24046800, 0x40040806, 0, 0x1006);
+	test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678);
+	test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678);
+	test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678);
+	test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678);
+	test(6, h, 0x24046800, 0x40040800, 0, 0);
+	test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010);
+	test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef);
+	test(4, g, 64, 15, 116, 26);
+	test(4, g, 64, 15, 127, 3);
+	test(4, g, 64, 15, 123, 1);
+	test(4, h, 64, 15, 123, 128);
+	test(4, h, 64, 15, 123, 129);
+	test(4, a, 10, 0, 0, 52);
+	test(4, b, 10, 0, 0, 220);
+	test(4, a, 10, 1, 0, 2);
+	test(4, b, 10, 1, 0, 6);
+	test(4, c, 10, 1, 0, 10);
+	test(4, d, 10, 1, 0, 20);
+
+	insert(4, a, 1, 0, 0, 0, 32);
+	insert(4, a, 64, 0, 0, 0, 32);
+	insert(4, a, 128, 0, 0, 0, 32);
+	insert(4, a, 192, 0, 0, 0, 32);
+	insert(4, a, 255, 0, 0, 0, 32);
+	allowedips_remove_by_peer(&t, a, &mutex);
+	test_negative(4, a, 1, 0, 0, 0);
+	test_negative(4, a, 64, 0, 0, 0);
+	test_negative(4, a, 128, 0, 0, 0);
+	test_negative(4, a, 192, 0, 0, 0);
+	test_negative(4, a, 255, 0, 0, 0);
+
+	allowedips_free(&t, &mutex);
+	allowedips_init(&t);
+	insert(4, a, 192, 168, 0, 0, 16);
+	insert(4, a, 192, 168, 0, 0, 24);
+	allowedips_remove_by_peer(&t, a, &mutex);
+	test_negative(4, a, 192, 168, 0, 1);
+
+	/* These will hit the BUG_ON(len >= 128) in free_node if something goes wrong. */
+	for (i = 0; i < 128; ++i) {
+		part = cpu_to_be64(~(1LLU << (i % 64)));
+		memset(&ip, 0xff, 16);
+		memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
+		allowedips_insert_v6(&t, &ip, 128, a, &mutex);
+	}
+
+#ifdef DEBUG_RANDOM_TRIE
+	if (success)
+		success = randomized_test();
+#endif
+
+	if (success)
+		pr_info("allowedips self-tests: pass\n");
+
+free:
+	allowedips_free(&t, &mutex);
+	kfree(a);
+	kfree(b);
+	kfree(c);
+	kfree(d);
+	kfree(e);
+	kfree(f);
+	kfree(g);
+	kfree(h);
+	mutex_unlock(&mutex);
+
+	return success;
+}
+#undef test_negative
+#undef test
+#undef remove
+#undef insert
+#undef init_peer
+
+#endif
diff --git b/net/wireguard/selftest/blake2s.h b/net/wireguard/selftest/blake2s.h
new file mode 100644
index 0000000..601f016
--- /dev/null
+++ b/net/wireguard/selftest/blake2s.h
@@ -0,0 +1,559 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+static const u8 blake2s_testvecs[][BLAKE2S_OUTBYTES] __initconst = {
+	{ 0x69, 0x21, 0x7A, 0x30, 0x79, 0x90, 0x80, 0x94, 0xE1, 0x11, 0x21, 0xD0, 0x42, 0x35, 0x4A, 0x7C, 0x1F, 0x55, 0xB6, 0x48, 0x2C, 0xA1, 0xA5, 0x1E, 0x1B, 0x25, 0x0D, 0xFD, 0x1E, 0xD0, 0xEE, 0xF9 },
+	{ 0xE3, 0x4D, 0x74, 0xDB, 0xAF, 0x4F, 0xF4, 0xC6, 0xAB, 0xD8, 0x71, 0xCC, 0x22, 0x04, 0x51, 0xD2, 0xEA, 0x26, 0x48, 0x84, 0x6C, 0x77, 0x57, 0xFB, 0xAA, 0xC8, 0x2F, 0xE5, 0x1A, 0xD6, 0x4B, 0xEA },
+	{ 0xDD, 0xAD, 0x9A, 0xB1, 0x5D, 0xAC, 0x45, 0x49, 0xBA, 0x42, 0xF4, 0x9D, 0x26, 0x24, 0x96, 0xBE, 0xF6, 0xC0, 0xBA, 0xE1, 0xDD, 0x34, 0x2A, 0x88, 0x08, 0xF8, 0xEA, 0x26, 0x7C, 0x6E, 0x21, 0x0C },
+	{ 0xE8, 0xF9, 0x1C, 0x6E, 0xF2, 0x32, 0xA0, 0x41, 0x45, 0x2A, 0xB0, 0xE1, 0x49, 0x07, 0x0C, 0xDD, 0x7D, 0xD1, 0x76, 0x9E, 0x75, 0xB3, 0xA5, 0x92, 0x1B, 0xE3, 0x78, 0x76, 0xC4, 0x5C, 0x99, 0x00 },
+	{ 0x0C, 0xC7, 0x0E, 0x00, 0x34, 0x8B, 0x86, 0xBA, 0x29, 0x44, 0xD0, 0xC3, 0x20, 0x38, 0xB2, 0x5C, 0x55, 0x58, 0x4F, 0x90, 0xDF, 0x23, 0x04, 0xF5, 0x5F, 0xA3, 0x32, 0xAF, 0x5F, 0xB0, 0x1E, 0x20 },
+	{ 0xEC, 0x19, 0x64, 0x19, 0x10, 0x87, 0xA4, 0xFE, 0x9D, 0xF1, 0xC7, 0x95, 0x34, 0x2A, 0x02, 0xFF, 0xC1, 0x91, 0xA5, 0xB2, 0x51, 0x76, 0x48, 0x56, 0xAE, 0x5B, 0x8B, 0x57, 0x69, 0xF0, 0xC6, 0xCD },
+	{ 0xE1, 0xFA, 0x51, 0x61, 0x8D, 0x7D, 0xF4, 0xEB, 0x70, 0xCF, 0x0D, 0x5A, 0x9E, 0x90, 0x6F, 0x80, 0x6E, 0x9D, 0x19, 0xF7, 0xF4, 0xF0, 0x1E, 0x3B, 0x62, 0x12, 0x88, 0xE4, 0x12, 0x04, 0x05, 0xD6 },
+	{ 0x59, 0x80, 0x01, 0xFA, 0xFB, 0xE8, 0xF9, 0x4E, 0xC6, 0x6D, 0xC8, 0x27, 0xD0, 0x12, 0xCF, 0xCB, 0xBA, 0x22, 0x28, 0x56, 0x9F, 0x44, 0x8E, 0x89, 0xEA, 0x22, 0x08, 0xC8, 0xBF, 0x76, 0x92, 0x93 },
+	{ 0xC7, 0xE8, 0x87, 0xB5, 0x46, 0x62, 0x36, 0x35, 0xE9, 0x3E, 0x04, 0x95, 0x59, 0x8F, 0x17, 0x26, 0x82, 0x19, 0x96, 0xC2, 0x37, 0x77, 0x05, 0xB9, 0x3A, 0x1F, 0x63, 0x6F, 0x87, 0x2B, 0xFA, 0x2D },
+	{ 0xC3, 0x15, 0xA4, 0x37, 0xDD, 0x28, 0x06, 0x2A, 0x77, 0x0D, 0x48, 0x19, 0x67, 0x13, 0x6B, 0x1B, 0x5E, 0xB8, 0x8B, 0x21, 0xEE, 0x53, 0xD0, 0x32, 0x9C, 0x58, 0x97, 0x12, 0x6E, 0x9D, 0xB0, 0x2C },
+	{ 0xBB, 0x47, 0x3D, 0xED, 0xDC, 0x05, 0x5F, 0xEA, 0x62, 0x28, 0xF2, 0x07, 0xDA, 0x57, 0x53, 0x47, 0xBB, 0x00, 0x40, 0x4C, 0xD3, 0x49, 0xD3, 0x8C, 0x18, 0x02, 0x63, 0x07, 0xA2, 0x24, 0xCB, 0xFF },
+	{ 0x68, 0x7E, 0x18, 0x73, 0xA8, 0x27, 0x75, 0x91, 0xBB, 0x33, 0xD9, 0xAD, 0xF9, 0xA1, 0x39, 0x12, 0xEF, 0xEF, 0xE5, 0x57, 0xCA, 0xFC, 0x39, 0xA7, 0x95, 0x26, 0x23, 0xE4, 0x72, 0x55, 0xF1, 0x6D },
+	{ 0x1A, 0xC7, 0xBA, 0x75, 0x4D, 0x6E, 0x2F, 0x94, 0xE0, 0xE8, 0x6C, 0x46, 0xBF, 0xB2, 0x62, 0xAB, 0xBB, 0x74, 0xF4, 0x50, 0xEF, 0x45, 0x6D, 0x6B, 0x4D, 0x97, 0xAA, 0x80, 0xCE, 0x6D, 0xA7, 0x67 },
+	{ 0x01, 0x2C, 0x97, 0x80, 0x96, 0x14, 0x81, 0x6B, 0x5D, 0x94, 0x94, 0x47, 0x7D, 0x4B, 0x68, 0x7D, 0x15, 0xB9, 0x6E, 0xB6, 0x9C, 0x0E, 0x80, 0x74, 0xA8, 0x51, 0x6F, 0x31, 0x22, 0x4B, 0x5C, 0x98 },
+	{ 0x91, 0xFF, 0xD2, 0x6C, 0xFA, 0x4D, 0xA5, 0x13, 0x4C, 0x7E, 0xA2, 0x62, 0xF7, 0x88, 0x9C, 0x32, 0x9F, 0x61, 0xF6, 0xA6, 0x57, 0x22, 0x5C, 0xC2, 0x12, 0xF4, 0x00, 0x56, 0xD9, 0x86, 0xB3, 0xF4 },
+	{ 0xD9, 0x7C, 0x82, 0x8D, 0x81, 0x82, 0xA7, 0x21, 0x80, 0xA0, 0x6A, 0x78, 0x26, 0x83, 0x30, 0x67, 0x3F, 0x7C, 0x4E, 0x06, 0x35, 0x94, 0x7C, 0x04, 0xC0, 0x23, 0x23, 0xFD, 0x45, 0xC0, 0xA5, 0x2D },
+	{ 0xEF, 0xC0, 0x4C, 0xDC, 0x39, 0x1C, 0x7E, 0x91, 0x19, 0xBD, 0x38, 0x66, 0x8A, 0x53, 0x4E, 0x65, 0xFE, 0x31, 0x03, 0x6D, 0x6A, 0x62, 0x11, 0x2E, 0x44, 0xEB, 0xEB, 0x11, 0xF9, 0xC5, 0x70, 0x80 },
+	{ 0x99, 0x2C, 0xF5, 0xC0, 0x53, 0x44, 0x2A, 0x5F, 0xBC, 0x4F, 0xAF, 0x58, 0x3E, 0x04, 0xE5, 0x0B, 0xB7, 0x0D, 0x2F, 0x39, 0xFB, 0xB6, 0xA5, 0x03, 0xF8, 0x9E, 0x56, 0xA6, 0x3E, 0x18, 0x57, 0x8A },
+	{ 0x38, 0x64, 0x0E, 0x9F, 0x21, 0x98, 0x3E, 0x67, 0xB5, 0x39, 0xCA, 0xCC, 0xAE, 0x5E, 0xCF, 0x61, 0x5A, 0xE2, 0x76, 0x4F, 0x75, 0xA0, 0x9C, 0x9C, 0x59, 0xB7, 0x64, 0x83, 0xC1, 0xFB, 0xC7, 0x35 },
+	{ 0x21, 0x3D, 0xD3, 0x4C, 0x7E, 0xFE, 0x4F, 0xB2, 0x7A, 0x6B, 0x35, 0xF6, 0xB4, 0x00, 0x0D, 0x1F, 0xE0, 0x32, 0x81, 0xAF, 0x3C, 0x72, 0x3E, 0x5C, 0x9F, 0x94, 0x74, 0x7A, 0x5F, 0x31, 0xCD, 0x3B },
+	{ 0xEC, 0x24, 0x6E, 0xEE, 0xB9, 0xCE, 0xD3, 0xF7, 0xAD, 0x33, 0xED, 0x28, 0x66, 0x0D, 0xD9, 0xBB, 0x07, 0x32, 0x51, 0x3D, 0xB4, 0xE2, 0xFA, 0x27, 0x8B, 0x60, 0xCD, 0xE3, 0x68, 0x2A, 0x4C, 0xCD },
+	{ 0xAC, 0x9B, 0x61, 0xD4, 0x46, 0x64, 0x8C, 0x30, 0x05, 0xD7, 0x89, 0x2B, 0xF3, 0xA8, 0x71, 0x9F, 0x4C, 0x81, 0x81, 0xCF, 0xDC, 0xBC, 0x2B, 0x79, 0xFE, 0xF1, 0x0A, 0x27, 0x9B, 0x91, 0x10, 0x95 },
+	{ 0x7B, 0xF8, 0xB2, 0x29, 0x59, 0xE3, 0x4E, 0x3A, 0x43, 0xF7, 0x07, 0x92, 0x23, 0xE8, 0x3A, 0x97, 0x54, 0x61, 0x7D, 0x39, 0x1E, 0x21, 0x3D, 0xFD, 0x80, 0x8E, 0x41, 0xB9, 0xBE, 0xAD, 0x4C, 0xE7 },
+	{ 0x68, 0xD4, 0xB5, 0xD4, 0xFA, 0x0E, 0x30, 0x2B, 0x64, 0xCC, 0xC5, 0xAF, 0x79, 0x29, 0x13, 0xAC, 0x4C, 0x88, 0xEC, 0x95, 0xC0, 0x7D, 0xDF, 0x40, 0x69, 0x42, 0x56, 0xEB, 0x88, 0xCE, 0x9F, 0x3D },
+	{ 0xB2, 0xC2, 0x42, 0x0F, 0x05, 0xF9, 0xAB, 0xE3, 0x63, 0x15, 0x91, 0x93, 0x36, 0xB3, 0x7E, 0x4E, 0x0F, 0xA3, 0x3F, 0xF7, 0xE7, 0x6A, 0x49, 0x27, 0x67, 0x00, 0x6F, 0xDB, 0x5D, 0x93, 0x54, 0x62 },
+	{ 0x13, 0x4F, 0x61, 0xBB, 0xD0, 0xBB, 0xB6, 0x9A, 0xED, 0x53, 0x43, 0x90, 0x45, 0x51, 0xA3, 0xE6, 0xC1, 0xAA, 0x7D, 0xCD, 0xD7, 0x7E, 0x90, 0x3E, 0x70, 0x23, 0xEB, 0x7C, 0x60, 0x32, 0x0A, 0xA7 },
+	{ 0x46, 0x93, 0xF9, 0xBF, 0xF7, 0xD4, 0xF3, 0x98, 0x6A, 0x7D, 0x17, 0x6E, 0x6E, 0x06, 0xF7, 0x2A, 0xD1, 0x49, 0x0D, 0x80, 0x5C, 0x99, 0xE2, 0x53, 0x47, 0xB8, 0xDE, 0x77, 0xB4, 0xDB, 0x6D, 0x9B },
+	{ 0x85, 0x3E, 0x26, 0xF7, 0x41, 0x95, 0x3B, 0x0F, 0xD5, 0xBD, 0xB4, 0x24, 0xE8, 0xAB, 0x9E, 0x8B, 0x37, 0x50, 0xEA, 0xA8, 0xEF, 0x61, 0xE4, 0x79, 0x02, 0xC9, 0x1E, 0x55, 0x4E, 0x9C, 0x73, 0xB9 },
+	{ 0xF7, 0xDE, 0x53, 0x63, 0x61, 0xAB, 0xAA, 0x0E, 0x15, 0x81, 0x56, 0xCF, 0x0E, 0xA4, 0xF6, 0x3A, 0x99, 0xB5, 0xE4, 0x05, 0x4F, 0x8F, 0xA4, 0xC9, 0xD4, 0x5F, 0x62, 0x85, 0xCA, 0xD5, 0x56, 0x94 },
+	{ 0x4C, 0x23, 0x06, 0x08, 0x86, 0x0A, 0x99, 0xAE, 0x8D, 0x7B, 0xD5, 0xC2, 0xCC, 0x17, 0xFA, 0x52, 0x09, 0x6B, 0x9A, 0x61, 0xBE, 0xDB, 0x17, 0xCB, 0x76, 0x17, 0x86, 0x4A, 0xD2, 0x9C, 0xA7, 0xA6 },
+	{ 0xAE, 0xB9, 0x20, 0xEA, 0x87, 0x95, 0x2D, 0xAD, 0xB1, 0xFB, 0x75, 0x92, 0x91, 0xE3, 0x38, 0x81, 0x39, 0xA8, 0x72, 0x86, 0x50, 0x01, 0x88, 0x6E, 0xD8, 0x47, 0x52, 0xE9, 0x3C, 0x25, 0x0C, 0x2A },
+	{ 0xAB, 0xA4, 0xAD, 0x9B, 0x48, 0x0B, 0x9D, 0xF3, 0xD0, 0x8C, 0xA5, 0xE8, 0x7B, 0x0C, 0x24, 0x40, 0xD4, 0xE4, 0xEA, 0x21, 0x22, 0x4C, 0x2E, 0xB4, 0x2C, 0xBA, 0xE4, 0x69, 0xD0, 0x89, 0xB9, 0x31 },
+	{ 0x05, 0x82, 0x56, 0x07, 0xD7, 0xFD, 0xF2, 0xD8, 0x2E, 0xF4, 0xC3, 0xC8, 0xC2, 0xAE, 0xA9, 0x61, 0xAD, 0x98, 0xD6, 0x0E, 0xDF, 0xF7, 0xD0, 0x18, 0x98, 0x3E, 0x21, 0x20, 0x4C, 0x0D, 0x93, 0xD1 },
+	{ 0xA7, 0x42, 0xF8, 0xB6, 0xAF, 0x82, 0xD8, 0xA6, 0xCA, 0x23, 0x57, 0xC5, 0xF1, 0xCF, 0x91, 0xDE, 0xFB, 0xD0, 0x66, 0x26, 0x7D, 0x75, 0xC0, 0x48, 0xB3, 0x52, 0x36, 0x65, 0x85, 0x02, 0x59, 0x62 },
+	{ 0x2B, 0xCA, 0xC8, 0x95, 0x99, 0x00, 0x0B, 0x42, 0xC9, 0x5A, 0xE2, 0x38, 0x35, 0xA7, 0x13, 0x70, 0x4E, 0xD7, 0x97, 0x89, 0xC8, 0x4F, 0xEF, 0x14, 0x9A, 0x87, 0x4F, 0xF7, 0x33, 0xF0, 0x17, 0xA2 },
+	{ 0xAC, 0x1E, 0xD0, 0x7D, 0x04, 0x8F, 0x10, 0x5A, 0x9E, 0x5B, 0x7A, 0xB8, 0x5B, 0x09, 0xA4, 0x92, 0xD5, 0xBA, 0xFF, 0x14, 0xB8, 0xBF, 0xB0, 0xE9, 0xFD, 0x78, 0x94, 0x86, 0xEE, 0xA2, 0xB9, 0x74 },
+	{ 0xE4, 0x8D, 0x0E, 0xCF, 0xAF, 0x49, 0x7D, 0x5B, 0x27, 0xC2, 0x5D, 0x99, 0xE1, 0x56, 0xCB, 0x05, 0x79, 0xD4, 0x40, 0xD6, 0xE3, 0x1F, 0xB6, 0x24, 0x73, 0x69, 0x6D, 0xBF, 0x95, 0xE0, 0x10, 0xE4 },
+	{ 0x12, 0xA9, 0x1F, 0xAD, 0xF8, 0xB2, 0x16, 0x44, 0xFD, 0x0F, 0x93, 0x4F, 0x3C, 0x4A, 0x8F, 0x62, 0xBA, 0x86, 0x2F, 0xFD, 0x20, 0xE8, 0xE9, 0x61, 0x15, 0x4C, 0x15, 0xC1, 0x38, 0x84, 0xED, 0x3D },
+	{ 0x7C, 0xBE, 0xE9, 0x6E, 0x13, 0x98, 0x97, 0xDC, 0x98, 0xFB, 0xEF, 0x3B, 0xE8, 0x1A, 0xD4, 0xD9, 0x64, 0xD2, 0x35, 0xCB, 0x12, 0x14, 0x1F, 0xB6, 0x67, 0x27, 0xE6, 0xE5, 0xDF, 0x73, 0xA8, 0x78 },
+	{ 0xEB, 0xF6, 0x6A, 0xBB, 0x59, 0x7A, 0xE5, 0x72, 0xA7, 0x29, 0x7C, 0xB0, 0x87, 0x1E, 0x35, 0x5A, 0xCC, 0xAF, 0xAD, 0x83, 0x77, 0xB8, 0xE7, 0x8B, 0xF1, 0x64, 0xCE, 0x2A, 0x18, 0xDE, 0x4B, 0xAF },
+	{ 0x71, 0xB9, 0x33, 0xB0, 0x7E, 0x4F, 0xF7, 0x81, 0x8C, 0xE0, 0x59, 0xD0, 0x08, 0x82, 0x9E, 0x45, 0x3C, 0x6F, 0xF0, 0x2E, 0xC0, 0xA7, 0xDB, 0x39, 0x3F, 0xC2, 0xD8, 0x70, 0xF3, 0x7A, 0x72, 0x86 },
+	{ 0x7C, 0xF7, 0xC5, 0x13, 0x31, 0x22, 0x0B, 0x8D, 0x3E, 0xBA, 0xED, 0x9C, 0x29, 0x39, 0x8A, 0x16, 0xD9, 0x81, 0x56, 0xE2, 0x61, 0x3C, 0xB0, 0x88, 0xF2, 0xB0, 0xE0, 0x8A, 0x1B, 0xE4, 0xCF, 0x4F },
+	{ 0x3E, 0x41, 0xA1, 0x08, 0xE0, 0xF6, 0x4A, 0xD2, 0x76, 0xB9, 0x79, 0xE1, 0xCE, 0x06, 0x82, 0x79, 0xE1, 0x6F, 0x7B, 0xC7, 0xE4, 0xAA, 0x1D, 0x21, 0x1E, 0x17, 0xB8, 0x11, 0x61, 0xDF, 0x16, 0x02 },
+	{ 0x88, 0x65, 0x02, 0xA8, 0x2A, 0xB4, 0x7B, 0xA8, 0xD8, 0x67, 0x10, 0xAA, 0x9D, 0xE3, 0xD4, 0x6E, 0xA6, 0x5C, 0x47, 0xAF, 0x6E, 0xE8, 0xDE, 0x45, 0x0C, 0xCE, 0xB8, 0xB1, 0x1B, 0x04, 0x5F, 0x50 },
+	{ 0xC0, 0x21, 0xBC, 0x5F, 0x09, 0x54, 0xFE, 0xE9, 0x4F, 0x46, 0xEA, 0x09, 0x48, 0x7E, 0x10, 0xA8, 0x48, 0x40, 0xD0, 0x2F, 0x64, 0x81, 0x0B, 0xC0, 0x8D, 0x9E, 0x55, 0x1F, 0x7D, 0x41, 0x68, 0x14 },
+	{ 0x20, 0x30, 0x51, 0x6E, 0x8A, 0x5F, 0xE1, 0x9A, 0xE7, 0x9C, 0x33, 0x6F, 0xCE, 0x26, 0x38, 0x2A, 0x74, 0x9D, 0x3F, 0xD0, 0xEC, 0x91, 0xE5, 0x37, 0xD4, 0xBD, 0x23, 0x58, 0xC1, 0x2D, 0xFB, 0x22 },
+	{ 0x55, 0x66, 0x98, 0xDA, 0xC8, 0x31, 0x7F, 0xD3, 0x6D, 0xFB, 0xDF, 0x25, 0xA7, 0x9C, 0xB1, 0x12, 0xD5, 0x42, 0x58, 0x60, 0x60, 0x5C, 0xBA, 0xF5, 0x07, 0xF2, 0x3B, 0xF7, 0xE9, 0xF4, 0x2A, 0xFE },
+	{ 0x2F, 0x86, 0x7B, 0xA6, 0x77, 0x73, 0xFD, 0xC3, 0xE9, 0x2F, 0xCE, 0xD9, 0x9A, 0x64, 0x09, 0xAD, 0x39, 0xD0, 0xB8, 0x80, 0xFD, 0xE8, 0xF1, 0x09, 0xA8, 0x17, 0x30, 0xC4, 0x45, 0x1D, 0x01, 0x78 },
+	{ 0x17, 0x2E, 0xC2, 0x18, 0xF1, 0x19, 0xDF, 0xAE, 0x98, 0x89, 0x6D, 0xFF, 0x29, 0xDD, 0x98, 0x76, 0xC9, 0x4A, 0xF8, 0x74, 0x17, 0xF9, 0xAE, 0x4C, 0x70, 0x14, 0xBB, 0x4E, 0x4B, 0x96, 0xAF, 0xC7 },
+	{ 0x3F, 0x85, 0x81, 0x4A, 0x18, 0x19, 0x5F, 0x87, 0x9A, 0xA9, 0x62, 0xF9, 0x5D, 0x26, 0xBD, 0x82, 0xA2, 0x78, 0xF2, 0xB8, 0x23, 0x20, 0x21, 0x8F, 0x6B, 0x3B, 0xD6, 0xF7, 0xF6, 0x67, 0xA6, 0xD9 },
+	{ 0x1B, 0x61, 0x8F, 0xBA, 0xA5, 0x66, 0xB3, 0xD4, 0x98, 0xC1, 0x2E, 0x98, 0x2C, 0x9E, 0xC5, 0x2E, 0x4D, 0xA8, 0x5A, 0x8C, 0x54, 0xF3, 0x8F, 0x34, 0xC0, 0x90, 0x39, 0x4F, 0x23, 0xC1, 0x84, 0xC1 },
+	{ 0x0C, 0x75, 0x8F, 0xB5, 0x69, 0x2F, 0xFD, 0x41, 0xA3, 0x57, 0x5D, 0x0A, 0xF0, 0x0C, 0xC7, 0xFB, 0xF2, 0xCB, 0xE5, 0x90, 0x5A, 0x58, 0x32, 0x3A, 0x88, 0xAE, 0x42, 0x44, 0xF6, 0xE4, 0xC9, 0x93 },
+	{ 0xA9, 0x31, 0x36, 0x0C, 0xAD, 0x62, 0x8C, 0x7F, 0x12, 0xA6, 0xC1, 0xC4, 0xB7, 0x53, 0xB0, 0xF4, 0x06, 0x2A, 0xEF, 0x3C, 0xE6, 0x5A, 0x1A, 0xE3, 0xF1, 0x93, 0x69, 0xDA, 0xDF, 0x3A, 0xE2, 0x3D },
+	{ 0xCB, 0xAC, 0x7D, 0x77, 0x3B, 0x1E, 0x3B, 0x3C, 0x66, 0x91, 0xD7, 0xAB, 0xB7, 0xE9, 0xDF, 0x04, 0x5C, 0x8B, 0xA1, 0x92, 0x68, 0xDE, 0xD1, 0x53, 0x20, 0x7F, 0x5E, 0x80, 0x43, 0x52, 0xEC, 0x5D },
+	{ 0x23, 0xA1, 0x96, 0xD3, 0x80, 0x2E, 0xD3, 0xC1, 0xB3, 0x84, 0x01, 0x9A, 0x82, 0x32, 0x58, 0x40, 0xD3, 0x2F, 0x71, 0x95, 0x0C, 0x45, 0x80, 0xB0, 0x34, 0x45, 0xE0, 0x89, 0x8E, 0x14, 0x05, 0x3C },
+	{ 0xF4, 0x49, 0x54, 0x70, 0xF2, 0x26, 0xC8, 0xC2, 0x14, 0xBE, 0x08, 0xFD, 0xFA, 0xD4, 0xBC, 0x4A, 0x2A, 0x9D, 0xBE, 0xA9, 0x13, 0x6A, 0x21, 0x0D, 0xF0, 0xD4, 0xB6, 0x49, 0x29, 0xE6, 0xFC, 0x14 },
+	{ 0xE2, 0x90, 0xDD, 0x27, 0x0B, 0x46, 0x7F, 0x34, 0xAB, 0x1C, 0x00, 0x2D, 0x34, 0x0F, 0xA0, 0x16, 0x25, 0x7F, 0xF1, 0x9E, 0x58, 0x33, 0xFD, 0xBB, 0xF2, 0xCB, 0x40, 0x1C, 0x3B, 0x28, 0x17, 0xDE },
+	{ 0x9F, 0xC7, 0xB5, 0xDE, 0xD3, 0xC1, 0x50, 0x42, 0xB2, 0xA6, 0x58, 0x2D, 0xC3, 0x9B, 0xE0, 0x16, 0xD2, 0x4A, 0x68, 0x2D, 0x5E, 0x61, 0xAD, 0x1E, 0xFF, 0x9C, 0x63, 0x30, 0x98, 0x48, 0xF7, 0x06 },
+	{ 0x8C, 0xCA, 0x67, 0xA3, 0x6D, 0x17, 0xD5, 0xE6, 0x34, 0x1C, 0xB5, 0x92, 0xFD, 0x7B, 0xEF, 0x99, 0x26, 0xC9, 0xE3, 0xAA, 0x10, 0x27, 0xEA, 0x11, 0xA7, 0xD8, 0xBD, 0x26, 0x0B, 0x57, 0x6E, 0x04 },
+	{ 0x40, 0x93, 0x92, 0xF5, 0x60, 0xF8, 0x68, 0x31, 0xDA, 0x43, 0x73, 0xEE, 0x5E, 0x00, 0x74, 0x26, 0x05, 0x95, 0xD7, 0xBC, 0x24, 0x18, 0x3B, 0x60, 0xED, 0x70, 0x0D, 0x45, 0x83, 0xD3, 0xF6, 0xF0 },
+	{ 0x28, 0x02, 0x16, 0x5D, 0xE0, 0x90, 0x91, 0x55, 0x46, 0xF3, 0x39, 0x8C, 0xD8, 0x49, 0x16, 0x4A, 0x19, 0xF9, 0x2A, 0xDB, 0xC3, 0x61, 0xAD, 0xC9, 0x9B, 0x0F, 0x20, 0xC8, 0xEA, 0x07, 0x10, 0x54 },
+	{ 0xAD, 0x83, 0x91, 0x68, 0xD9, 0xF8, 0xA4, 0xBE, 0x95, 0xBA, 0x9E, 0xF9, 0xA6, 0x92, 0xF0, 0x72, 0x56, 0xAE, 0x43, 0xFE, 0x6F, 0x98, 0x64, 0xE2, 0x90, 0x69, 0x1B, 0x02, 0x56, 0xCE, 0x50, 0xA9 },
+	{ 0x75, 0xFD, 0xAA, 0x50, 0x38, 0xC2, 0x84, 0xB8, 0x6D, 0x6E, 0x8A, 0xFF, 0xE8, 0xB2, 0x80, 0x7E, 0x46, 0x7B, 0x86, 0x60, 0x0E, 0x79, 0xAF, 0x36, 0x89, 0xFB, 0xC0, 0x63, 0x28, 0xCB, 0xF8, 0x94 },
+	{ 0xE5, 0x7C, 0xB7, 0x94, 0x87, 0xDD, 0x57, 0x90, 0x24, 0x32, 0xB2, 0x50, 0x73, 0x38, 0x13, 0xBD, 0x96, 0xA8, 0x4E, 0xFC, 0xE5, 0x9F, 0x65, 0x0F, 0xAC, 0x26, 0xE6, 0x69, 0x6A, 0xEF, 0xAF, 0xC3 },
+	{ 0x56, 0xF3, 0x4E, 0x8B, 0x96, 0x55, 0x7E, 0x90, 0xC1, 0xF2, 0x4B, 0x52, 0xD0, 0xC8, 0x9D, 0x51, 0x08, 0x6A, 0xCF, 0x1B, 0x00, 0xF6, 0x34, 0xCF, 0x1D, 0xDE, 0x92, 0x33, 0xB8, 0xEA, 0xAA, 0x3E },
+	{ 0x1B, 0x53, 0xEE, 0x94, 0xAA, 0xF3, 0x4E, 0x4B, 0x15, 0x9D, 0x48, 0xDE, 0x35, 0x2C, 0x7F, 0x06, 0x61, 0xD0, 0xA4, 0x0E, 0xDF, 0xF9, 0x5A, 0x0B, 0x16, 0x39, 0xB4, 0x09, 0x0E, 0x97, 0x44, 0x72 },
+	{ 0x05, 0x70, 0x5E, 0x2A, 0x81, 0x75, 0x7C, 0x14, 0xBD, 0x38, 0x3E, 0xA9, 0x8D, 0xDA, 0x54, 0x4E, 0xB1, 0x0E, 0x6B, 0xC0, 0x7B, 0xAE, 0x43, 0x5E, 0x25, 0x18, 0xDB, 0xE1, 0x33, 0x52, 0x53, 0x75 },
+	{ 0xD8, 0xB2, 0x86, 0x6E, 0x8A, 0x30, 0x9D, 0xB5, 0x3E, 0x52, 0x9E, 0xC3, 0x29, 0x11, 0xD8, 0x2F, 0x5C, 0xA1, 0x6C, 0xFF, 0x76, 0x21, 0x68, 0x91, 0xA9, 0x67, 0x6A, 0xA3, 0x1A, 0xAA, 0x6C, 0x42 },
+	{ 0xF5, 0x04, 0x1C, 0x24, 0x12, 0x70, 0xEB, 0x04, 0xC7, 0x1E, 0xC2, 0xC9, 0x5D, 0x4C, 0x38, 0xD8, 0x03, 0xB1, 0x23, 0x7B, 0x0F, 0x29, 0xFD, 0x4D, 0xB3, 0xEB, 0x39, 0x76, 0x69, 0xE8, 0x86, 0x99 },
+	{ 0x9A, 0x4C, 0xE0, 0x77, 0xC3, 0x49, 0x32, 0x2F, 0x59, 0x5E, 0x0E, 0xE7, 0x9E, 0xD0, 0xDA, 0x5F, 0xAB, 0x66, 0x75, 0x2C, 0xBF, 0xEF, 0x8F, 0x87, 0xD0, 0xE9, 0xD0, 0x72, 0x3C, 0x75, 0x30, 0xDD },
+	{ 0x65, 0x7B, 0x09, 0xF3, 0xD0, 0xF5, 0x2B, 0x5B, 0x8F, 0x2F, 0x97, 0x16, 0x3A, 0x0E, 0xDF, 0x0C, 0x04, 0xF0, 0x75, 0x40, 0x8A, 0x07, 0xBB, 0xEB, 0x3A, 0x41, 0x01, 0xA8, 0x91, 0x99, 0x0D, 0x62 },
+	{ 0x1E, 0x3F, 0x7B, 0xD5, 0xA5, 0x8F, 0xA5, 0x33, 0x34, 0x4A, 0xA8, 0xED, 0x3A, 0xC1, 0x22, 0xBB, 0x9E, 0x70, 0xD4, 0xEF, 0x50, 0xD0, 0x04, 0x53, 0x08, 0x21, 0x94, 0x8F, 0x5F, 0xE6, 0x31, 0x5A },
+	{ 0x80, 0xDC, 0xCF, 0x3F, 0xD8, 0x3D, 0xFD, 0x0D, 0x35, 0xAA, 0x28, 0x58, 0x59, 0x22, 0xAB, 0x89, 0xD5, 0x31, 0x39, 0x97, 0x67, 0x3E, 0xAF, 0x90, 0x5C, 0xEA, 0x9C, 0x0B, 0x22, 0x5C, 0x7B, 0x5F },
+	{ 0x8A, 0x0D, 0x0F, 0xBF, 0x63, 0x77, 0xD8, 0x3B, 0xB0, 0x8B, 0x51, 0x4B, 0x4B, 0x1C, 0x43, 0xAC, 0xC9, 0x5D, 0x75, 0x17, 0x14, 0xF8, 0x92, 0x56, 0x45, 0xCB, 0x6B, 0xC8, 0x56, 0xCA, 0x15, 0x0A },
+	{ 0x9F, 0xA5, 0xB4, 0x87, 0x73, 0x8A, 0xD2, 0x84, 0x4C, 0xC6, 0x34, 0x8A, 0x90, 0x19, 0x18, 0xF6, 0x59, 0xA3, 0xB8, 0x9E, 0x9C, 0x0D, 0xFE, 0xEA, 0xD3, 0x0D, 0xD9, 0x4B, 0xCF, 0x42, 0xEF, 0x8E },
+	{ 0x80, 0x83, 0x2C, 0x4A, 0x16, 0x77, 0xF5, 0xEA, 0x25, 0x60, 0xF6, 0x68, 0xE9, 0x35, 0x4D, 0xD3, 0x69, 0x97, 0xF0, 0x37, 0x28, 0xCF, 0xA5, 0x5E, 0x1B, 0x38, 0x33, 0x7C, 0x0C, 0x9E, 0xF8, 0x18 },
+	{ 0xAB, 0x37, 0xDD, 0xB6, 0x83, 0x13, 0x7E, 0x74, 0x08, 0x0D, 0x02, 0x6B, 0x59, 0x0B, 0x96, 0xAE, 0x9B, 0xB4, 0x47, 0x72, 0x2F, 0x30, 0x5A, 0x5A, 0xC5, 0x70, 0xEC, 0x1D, 0xF9, 0xB1, 0x74, 0x3C },
+	{ 0x3E, 0xE7, 0x35, 0xA6, 0x94, 0xC2, 0x55, 0x9B, 0x69, 0x3A, 0xA6, 0x86, 0x29, 0x36, 0x1E, 0x15, 0xD1, 0x22, 0x65, 0xAD, 0x6A, 0x3D, 0xED, 0xF4, 0x88, 0xB0, 0xB0, 0x0F, 0xAC, 0x97, 0x54, 0xBA },
+	{ 0xD6, 0xFC, 0xD2, 0x32, 0x19, 0xB6, 0x47, 0xE4, 0xCB, 0xD5, 0xEB, 0x2D, 0x0A, 0xD0, 0x1E, 0xC8, 0x83, 0x8A, 0x4B, 0x29, 0x01, 0xFC, 0x32, 0x5C, 0xC3, 0x70, 0x19, 0x81, 0xCA, 0x6C, 0x88, 0x8B },
+	{ 0x05, 0x20, 0xEC, 0x2F, 0x5B, 0xF7, 0xA7, 0x55, 0xDA, 0xCB, 0x50, 0xC6, 0xBF, 0x23, 0x3E, 0x35, 0x15, 0x43, 0x47, 0x63, 0xDB, 0x01, 0x39, 0xCC, 0xD9, 0xFA, 0xEF, 0xBB, 0x82, 0x07, 0x61, 0x2D },
+	{ 0xAF, 0xF3, 0xB7, 0x5F, 0x3F, 0x58, 0x12, 0x64, 0xD7, 0x66, 0x16, 0x62, 0xB9, 0x2F, 0x5A, 0xD3, 0x7C, 0x1D, 0x32, 0xBD, 0x45, 0xFF, 0x81, 0xA4, 0xED, 0x8A, 0xDC, 0x9E, 0xF3, 0x0D, 0xD9, 0x89 },
+	{ 0xD0, 0xDD, 0x65, 0x0B, 0xEF, 0xD3, 0xBA, 0x63, 0xDC, 0x25, 0x10, 0x2C, 0x62, 0x7C, 0x92, 0x1B, 0x9C, 0xBE, 0xB0, 0xB1, 0x30, 0x68, 0x69, 0x35, 0xB5, 0xC9, 0x27, 0xCB, 0x7C, 0xCD, 0x5E, 0x3B },
+	{ 0xE1, 0x14, 0x98, 0x16, 0xB1, 0x0A, 0x85, 0x14, 0xFB, 0x3E, 0x2C, 0xAB, 0x2C, 0x08, 0xBE, 0xE9, 0xF7, 0x3C, 0xE7, 0x62, 0x21, 0x70, 0x12, 0x46, 0xA5, 0x89, 0xBB, 0xB6, 0x73, 0x02, 0xD8, 0xA9 },
+	{ 0x7D, 0xA3, 0xF4, 0x41, 0xDE, 0x90, 0x54, 0x31, 0x7E, 0x72, 0xB5, 0xDB, 0xF9, 0x79, 0xDA, 0x01, 0xE6, 0xBC, 0xEE, 0xBB, 0x84, 0x78, 0xEA, 0xE6, 0xA2, 0x28, 0x49, 0xD9, 0x02, 0x92, 0x63, 0x5C },
+	{ 0x12, 0x30, 0xB1, 0xFC, 0x8A, 0x7D, 0x92, 0x15, 0xED, 0xC2, 0xD4, 0xA2, 0xDE, 0xCB, 0xDD, 0x0A, 0x6E, 0x21, 0x6C, 0x92, 0x42, 0x78, 0xC9, 0x1F, 0xC5, 0xD1, 0x0E, 0x7D, 0x60, 0x19, 0x2D, 0x94 },
+	{ 0x57, 0x50, 0xD7, 0x16, 0xB4, 0x80, 0x8F, 0x75, 0x1F, 0xEB, 0xC3, 0x88, 0x06, 0xBA, 0x17, 0x0B, 0xF6, 0xD5, 0x19, 0x9A, 0x78, 0x16, 0xBE, 0x51, 0x4E, 0x3F, 0x93, 0x2F, 0xBE, 0x0C, 0xB8, 0x71 },
+	{ 0x6F, 0xC5, 0x9B, 0x2F, 0x10, 0xFE, 0xBA, 0x95, 0x4A, 0xA6, 0x82, 0x0B, 0x3C, 0xA9, 0x87, 0xEE, 0x81, 0xD5, 0xCC, 0x1D, 0xA3, 0xC6, 0x3C, 0xE8, 0x27, 0x30, 0x1C, 0x56, 0x9D, 0xFB, 0x39, 0xCE },
+	{ 0xC7, 0xC3, 0xFE, 0x1E, 0xEB, 0xDC, 0x7B, 0x5A, 0x93, 0x93, 0x26, 0xE8, 0xDD, 0xB8, 0x3E, 0x8B, 0xF2, 0xB7, 0x80, 0xB6, 0x56, 0x78, 0xCB, 0x62, 0xF2, 0x08, 0xB0, 0x40, 0xAB, 0xDD, 0x35, 0xE2 },
+	{ 0x0C, 0x75, 0xC1, 0xA1, 0x5C, 0xF3, 0x4A, 0x31, 0x4E, 0xE4, 0x78, 0xF4, 0xA5, 0xCE, 0x0B, 0x8A, 0x6B, 0x36, 0x52, 0x8E, 0xF7, 0xA8, 0x20, 0x69, 0x6C, 0x3E, 0x42, 0x46, 0xC5, 0xA1, 0x58, 0x64 },
+	{ 0x21, 0x6D, 0xC1, 0x2A, 0x10, 0x85, 0x69, 0xA3, 0xC7, 0xCD, 0xDE, 0x4A, 0xED, 0x43, 0xA6, 0xC3, 0x30, 0x13, 0x9D, 0xDA, 0x3C, 0xCC, 0x4A, 0x10, 0x89, 0x05, 0xDB, 0x38, 0x61, 0x89, 0x90, 0x50 },
+	{ 0xA5, 0x7B, 0xE6, 0xAE, 0x67, 0x56, 0xF2, 0x8B, 0x02, 0xF5, 0x9D, 0xAD, 0xF7, 0xE0, 0xD7, 0xD8, 0x80, 0x7F, 0x10, 0xFA, 0x15, 0xCE, 0xD1, 0xAD, 0x35, 0x85, 0x52, 0x1A, 0x1D, 0x99, 0x5A, 0x89 },
+	{ 0x81, 0x6A, 0xEF, 0x87, 0x59, 0x53, 0x71, 0x6C, 0xD7, 0xA5, 0x81, 0xF7, 0x32, 0xF5, 0x3D, 0xD4, 0x35, 0xDA, 0xB6, 0x6D, 0x09, 0xC3, 0x61, 0xD2, 0xD6, 0x59, 0x2D, 0xE1, 0x77, 0x55, 0xD8, 0xA8 },
+	{ 0x9A, 0x76, 0x89, 0x32, 0x26, 0x69, 0x3B, 0x6E, 0xA9, 0x7E, 0x6A, 0x73, 0x8F, 0x9D, 0x10, 0xFB, 0x3D, 0x0B, 0x43, 0xAE, 0x0E, 0x8B, 0x7D, 0x81, 0x23, 0xEA, 0x76, 0xCE, 0x97, 0x98, 0x9C, 0x7E },
+	{ 0x8D, 0xAE, 0xDB, 0x9A, 0x27, 0x15, 0x29, 0xDB, 0xB7, 0xDC, 0x3B, 0x60, 0x7F, 0xE5, 0xEB, 0x2D, 0x32, 0x11, 0x77, 0x07, 0x58, 0xDD, 0x3B, 0x0A, 0x35, 0x93, 0xD2, 0xD7, 0x95, 0x4E, 0x2D, 0x5B },
+	{ 0x16, 0xDB, 0xC0, 0xAA, 0x5D, 0xD2, 0xC7, 0x74, 0xF5, 0x05, 0x10, 0x0F, 0x73, 0x37, 0x86, 0xD8, 0xA1, 0x75, 0xFC, 0xBB, 0xB5, 0x9C, 0x43, 0xE1, 0xFB, 0xFF, 0x3E, 0x1E, 0xAF, 0x31, 0xCB, 0x4A },
+	{ 0x86, 0x06, 0xCB, 0x89, 0x9C, 0x6A, 0xEA, 0xF5, 0x1B, 0x9D, 0xB0, 0xFE, 0x49, 0x24, 0xA9, 0xFD, 0x5D, 0xAB, 0xC1, 0x9F, 0x88, 0x26, 0xF2, 0xBC, 0x1C, 0x1D, 0x7D, 0xA1, 0x4D, 0x2C, 0x2C, 0x99 },
+	{ 0x84, 0x79, 0x73, 0x1A, 0xED, 0xA5, 0x7B, 0xD3, 0x7E, 0xAD, 0xB5, 0x1A, 0x50, 0x7E, 0x30, 0x7F, 0x3B, 0xD9, 0x5E, 0x69, 0xDB, 0xCA, 0x94, 0xF3, 0xBC, 0x21, 0x72, 0x60, 0x66, 0xAD, 0x6D, 0xFD },
+	{ 0x58, 0x47, 0x3A, 0x9E, 0xA8, 0x2E, 0xFA, 0x3F, 0x3B, 0x3D, 0x8F, 0xC8, 0x3E, 0xD8, 0x86, 0x31, 0x27, 0xB3, 0x3A, 0xE8, 0xDE, 0xAE, 0x63, 0x07, 0x20, 0x1E, 0xDB, 0x6D, 0xDE, 0x61, 0xDE, 0x29 },
+	{ 0x9A, 0x92, 0x55, 0xD5, 0x3A, 0xF1, 0x16, 0xDE, 0x8B, 0xA2, 0x7C, 0xE3, 0x5B, 0x4C, 0x7E, 0x15, 0x64, 0x06, 0x57, 0xA0, 0xFC, 0xB8, 0x88, 0xC7, 0x0D, 0x95, 0x43, 0x1D, 0xAC, 0xD8, 0xF8, 0x30 },
+	{ 0x9E, 0xB0, 0x5F, 0xFB, 0xA3, 0x9F, 0xD8, 0x59, 0x6A, 0x45, 0x49, 0x3E, 0x18, 0xD2, 0x51, 0x0B, 0xF3, 0xEF, 0x06, 0x5C, 0x51, 0xD6, 0xE1, 0x3A, 0xBE, 0x66, 0xAA, 0x57, 0xE0, 0x5C, 0xFD, 0xB7 },
+	{ 0x81, 0xDC, 0xC3, 0xA5, 0x05, 0xEA, 0xCE, 0x3F, 0x87, 0x9D, 0x8F, 0x70, 0x27, 0x76, 0x77, 0x0F, 0x9D, 0xF5, 0x0E, 0x52, 0x1D, 0x14, 0x28, 0xA8, 0x5D, 0xAF, 0x04, 0xF9, 0xAD, 0x21, 0x50, 0xE0 },
+	{ 0xE3, 0xE3, 0xC4, 0xAA, 0x3A, 0xCB, 0xBC, 0x85, 0x33, 0x2A, 0xF9, 0xD5, 0x64, 0xBC, 0x24, 0x16, 0x5E, 0x16, 0x87, 0xF6, 0xB1, 0xAD, 0xCB, 0xFA, 0xE7, 0x7A, 0x8F, 0x03, 0xC7, 0x2A, 0xC2, 0x8C },
+	{ 0x67, 0x46, 0xC8, 0x0B, 0x4E, 0xB5, 0x6A, 0xEA, 0x45, 0xE6, 0x4E, 0x72, 0x89, 0xBB, 0xA3, 0xED, 0xBF, 0x45, 0xEC, 0xF8, 0x20, 0x64, 0x81, 0xFF, 0x63, 0x02, 0x12, 0x29, 0x84, 0xCD, 0x52, 0x6A },
+	{ 0x2B, 0x62, 0x8E, 0x52, 0x76, 0x4D, 0x7D, 0x62, 0xC0, 0x86, 0x8B, 0x21, 0x23, 0x57, 0xCD, 0xD1, 0x2D, 0x91, 0x49, 0x82, 0x2F, 0x4E, 0x98, 0x45, 0xD9, 0x18, 0xA0, 0x8D, 0x1A, 0xE9, 0x90, 0xC0 },
+	{ 0xE4, 0xBF, 0xE8, 0x0D, 0x58, 0xC9, 0x19, 0x94, 0x61, 0x39, 0x09, 0xDC, 0x4B, 0x1A, 0x12, 0x49, 0x68, 0x96, 0xC0, 0x04, 0xAF, 0x7B, 0x57, 0x01, 0x48, 0x3D, 0xE4, 0x5D, 0x28, 0x23, 0xD7, 0x8E },
+	{ 0xEB, 0xB4, 0xBA, 0x15, 0x0C, 0xEF, 0x27, 0x34, 0x34, 0x5B, 0x5D, 0x64, 0x1B, 0xBE, 0xD0, 0x3A, 0x21, 0xEA, 0xFA, 0xE9, 0x33, 0xC9, 0x9E, 0x00, 0x92, 0x12, 0xEF, 0x04, 0x57, 0x4A, 0x85, 0x30 },
+	{ 0x39, 0x66, 0xEC, 0x73, 0xB1, 0x54, 0xAC, 0xC6, 0x97, 0xAC, 0x5C, 0xF5, 0xB2, 0x4B, 0x40, 0xBD, 0xB0, 0xDB, 0x9E, 0x39, 0x88, 0x36, 0xD7, 0x6D, 0x4B, 0x88, 0x0E, 0x3B, 0x2A, 0xF1, 0xAA, 0x27 },
+	{ 0xEF, 0x7E, 0x48, 0x31, 0xB3, 0xA8, 0x46, 0x36, 0x51, 0x8D, 0x6E, 0x4B, 0xFC, 0xE6, 0x4A, 0x43, 0xDB, 0x2A, 0x5D, 0xDA, 0x9C, 0xCA, 0x2B, 0x44, 0xF3, 0x90, 0x33, 0xBD, 0xC4, 0x0D, 0x62, 0x43 },
+	{ 0x7A, 0xBF, 0x6A, 0xCF, 0x5C, 0x8E, 0x54, 0x9D, 0xDB, 0xB1, 0x5A, 0xE8, 0xD8, 0xB3, 0x88, 0xC1, 0xC1, 0x97, 0xE6, 0x98, 0x73, 0x7C, 0x97, 0x85, 0x50, 0x1E, 0xD1, 0xF9, 0x49, 0x30, 0xB7, 0xD9 },
+	{ 0x88, 0x01, 0x8D, 0xED, 0x66, 0x81, 0x3F, 0x0C, 0xA9, 0x5D, 0xEF, 0x47, 0x4C, 0x63, 0x06, 0x92, 0x01, 0x99, 0x67, 0xB9, 0xE3, 0x68, 0x88, 0xDA, 0xDD, 0x94, 0x12, 0x47, 0x19, 0xB6, 0x82, 0xF6 },
+	{ 0x39, 0x30, 0x87, 0x6B, 0x9F, 0xC7, 0x52, 0x90, 0x36, 0xB0, 0x08, 0xB1, 0xB8, 0xBB, 0x99, 0x75, 0x22, 0xA4, 0x41, 0x63, 0x5A, 0x0C, 0x25, 0xEC, 0x02, 0xFB, 0x6D, 0x90, 0x26, 0xE5, 0x5A, 0x97 },
+	{ 0x0A, 0x40, 0x49, 0xD5, 0x7E, 0x83, 0x3B, 0x56, 0x95, 0xFA, 0xC9, 0x3D, 0xD1, 0xFB, 0xEF, 0x31, 0x66, 0xB4, 0x4B, 0x12, 0xAD, 0x11, 0x24, 0x86, 0x62, 0x38, 0x3A, 0xE0, 0x51, 0xE1, 0x58, 0x27 },
+	{ 0x81, 0xDC, 0xC0, 0x67, 0x8B, 0xB6, 0xA7, 0x65, 0xE4, 0x8C, 0x32, 0x09, 0x65, 0x4F, 0xE9, 0x00, 0x89, 0xCE, 0x44, 0xFF, 0x56, 0x18, 0x47, 0x7E, 0x39, 0xAB, 0x28, 0x64, 0x76, 0xDF, 0x05, 0x2B },
+	{ 0xE6, 0x9B, 0x3A, 0x36, 0xA4, 0x46, 0x19, 0x12, 0xDC, 0x08, 0x34, 0x6B, 0x11, 0xDD, 0xCB, 0x9D, 0xB7, 0x96, 0xF8, 0x85, 0xFD, 0x01, 0x93, 0x6E, 0x66, 0x2F, 0xE2, 0x92, 0x97, 0xB0, 0x99, 0xA4 },
+	{ 0x5A, 0xC6, 0x50, 0x3B, 0x0D, 0x8D, 0xA6, 0x91, 0x76, 0x46, 0xE6, 0xDC, 0xC8, 0x7E, 0xDC, 0x58, 0xE9, 0x42, 0x45, 0x32, 0x4C, 0xC2, 0x04, 0xF4, 0xDD, 0x4A, 0xF0, 0x15, 0x63, 0xAC, 0xD4, 0x27 },
+	{ 0xDF, 0x6D, 0xDA, 0x21, 0x35, 0x9A, 0x30, 0xBC, 0x27, 0x17, 0x80, 0x97, 0x1C, 0x1A, 0xBD, 0x56, 0xA6, 0xEF, 0x16, 0x7E, 0x48, 0x08, 0x87, 0x88, 0x8E, 0x73, 0xA8, 0x6D, 0x3B, 0xF6, 0x05, 0xE9 },
+	{ 0xE8, 0xE6, 0xE4, 0x70, 0x71, 0xE7, 0xB7, 0xDF, 0x25, 0x80, 0xF2, 0x25, 0xCF, 0xBB, 0xED, 0xF8, 0x4C, 0xE6, 0x77, 0x46, 0x62, 0x66, 0x28, 0xD3, 0x30, 0x97, 0xE4, 0xB7, 0xDC, 0x57, 0x11, 0x07 },
+	{ 0x53, 0xE4, 0x0E, 0xAD, 0x62, 0x05, 0x1E, 0x19, 0xCB, 0x9B, 0xA8, 0x13, 0x3E, 0x3E, 0x5C, 0x1C, 0xE0, 0x0D, 0xDC, 0xAD, 0x8A, 0xCF, 0x34, 0x2A, 0x22, 0x43, 0x60, 0xB0, 0xAC, 0xC1, 0x47, 0x77 },
+	{ 0x9C, 0xCD, 0x53, 0xFE, 0x80, 0xBE, 0x78, 0x6A, 0xA9, 0x84, 0x63, 0x84, 0x62, 0xFB, 0x28, 0xAF, 0xDF, 0x12, 0x2B, 0x34, 0xD7, 0x8F, 0x46, 0x87, 0xEC, 0x63, 0x2B, 0xB1, 0x9D, 0xE2, 0x37, 0x1A },
+	{ 0xCB, 0xD4, 0x80, 0x52, 0xC4, 0x8D, 0x78, 0x84, 0x66, 0xA3, 0xE8, 0x11, 0x8C, 0x56, 0xC9, 0x7F, 0xE1, 0x46, 0xE5, 0x54, 0x6F, 0xAA, 0xF9, 0x3E, 0x2B, 0xC3, 0xC4, 0x7E, 0x45, 0x93, 0x97, 0x53 },
+	{ 0x25, 0x68, 0x83, 0xB1, 0x4E, 0x2A, 0xF4, 0x4D, 0xAD, 0xB2, 0x8E, 0x1B, 0x34, 0xB2, 0xAC, 0x0F, 0x0F, 0x4C, 0x91, 0xC3, 0x4E, 0xC9, 0x16, 0x9E, 0x29, 0x03, 0x61, 0x58, 0xAC, 0xAA, 0x95, 0xB9 },
+	{ 0x44, 0x71, 0xB9, 0x1A, 0xB4, 0x2D, 0xB7, 0xC4, 0xDD, 0x84, 0x90, 0xAB, 0x95, 0xA2, 0xEE, 0x8D, 0x04, 0xE3, 0xEF, 0x5C, 0x3D, 0x6F, 0xC7, 0x1A, 0xC7, 0x4B, 0x2B, 0x26, 0x91, 0x4D, 0x16, 0x41 },
+	{ 0xA5, 0xEB, 0x08, 0x03, 0x8F, 0x8F, 0x11, 0x55, 0xED, 0x86, 0xE6, 0x31, 0x90, 0x6F, 0xC1, 0x30, 0x95, 0xF6, 0xBB, 0xA4, 0x1D, 0xE5, 0xD4, 0xE7, 0x95, 0x75, 0x8E, 0xC8, 0xC8, 0xDF, 0x8A, 0xF1 },
+	{ 0xDC, 0x1D, 0xB6, 0x4E, 0xD8, 0xB4, 0x8A, 0x91, 0x0E, 0x06, 0x0A, 0x6B, 0x86, 0x63, 0x74, 0xC5, 0x78, 0x78, 0x4E, 0x9A, 0xC4, 0x9A, 0xB2, 0x77, 0x40, 0x92, 0xAC, 0x71, 0x50, 0x19, 0x34, 0xAC },
+	{ 0x28, 0x54, 0x13, 0xB2, 0xF2, 0xEE, 0x87, 0x3D, 0x34, 0x31, 0x9E, 0xE0, 0xBB, 0xFB, 0xB9, 0x0F, 0x32, 0xDA, 0x43, 0x4C, 0xC8, 0x7E, 0x3D, 0xB5, 0xED, 0x12, 0x1B, 0xB3, 0x98, 0xED, 0x96, 0x4B },
+	{ 0x02, 0x16, 0xE0, 0xF8, 0x1F, 0x75, 0x0F, 0x26, 0xF1, 0x99, 0x8B, 0xC3, 0x93, 0x4E, 0x3E, 0x12, 0x4C, 0x99, 0x45, 0xE6, 0x85, 0xA6, 0x0B, 0x25, 0xE8, 0xFB, 0xD9, 0x62, 0x5A, 0xB6, 0xB5, 0x99 },
+	{ 0x38, 0xC4, 0x10, 0xF5, 0xB9, 0xD4, 0x07, 0x20, 0x50, 0x75, 0x5B, 0x31, 0xDC, 0xA8, 0x9F, 0xD5, 0x39, 0x5C, 0x67, 0x85, 0xEE, 0xB3, 0xD7, 0x90, 0xF3, 0x20, 0xFF, 0x94, 0x1C, 0x5A, 0x93, 0xBF },
+	{ 0xF1, 0x84, 0x17, 0xB3, 0x9D, 0x61, 0x7A, 0xB1, 0xC1, 0x8F, 0xDF, 0x91, 0xEB, 0xD0, 0xFC, 0x6D, 0x55, 0x16, 0xBB, 0x34, 0xCF, 0x39, 0x36, 0x40, 0x37, 0xBC, 0xE8, 0x1F, 0xA0, 0x4C, 0xEC, 0xB1 },
+	{ 0x1F, 0xA8, 0x77, 0xDE, 0x67, 0x25, 0x9D, 0x19, 0x86, 0x3A, 0x2A, 0x34, 0xBC, 0xC6, 0x96, 0x2A, 0x2B, 0x25, 0xFC, 0xBF, 0x5C, 0xBE, 0xCD, 0x7E, 0xDE, 0x8F, 0x1F, 0xA3, 0x66, 0x88, 0xA7, 0x96 },
+	{ 0x5B, 0xD1, 0x69, 0xE6, 0x7C, 0x82, 0xC2, 0xC2, 0xE9, 0x8E, 0xF7, 0x00, 0x8B, 0xDF, 0x26, 0x1F, 0x2D, 0xDF, 0x30, 0xB1, 0xC0, 0x0F, 0x9E, 0x7F, 0x27, 0x5B, 0xB3, 0xE8, 0xA2, 0x8D, 0xC9, 0xA2 },
+	{ 0xC8, 0x0A, 0xBE, 0xEB, 0xB6, 0x69, 0xAD, 0x5D, 0xEE, 0xB5, 0xF5, 0xEC, 0x8E, 0xA6, 0xB7, 0xA0, 0x5D, 0xDF, 0x7D, 0x31, 0xEC, 0x4C, 0x0A, 0x2E, 0xE2, 0x0B, 0x0B, 0x98, 0xCA, 0xEC, 0x67, 0x46 },
+	{ 0xE7, 0x6D, 0x3F, 0xBD, 0xA5, 0xBA, 0x37, 0x4E, 0x6B, 0xF8, 0xE5, 0x0F, 0xAD, 0xC3, 0xBB, 0xB9, 0xBA, 0x5C, 0x20, 0x6E, 0xBD, 0xEC, 0x89, 0xA3, 0xA5, 0x4C, 0xF3, 0xDD, 0x84, 0xA0, 0x70, 0x16 },
+	{ 0x7B, 0xBA, 0x9D, 0xC5, 0xB5, 0xDB, 0x20, 0x71, 0xD1, 0x77, 0x52, 0xB1, 0x04, 0x4C, 0x1E, 0xCE, 0xD9, 0x6A, 0xAF, 0x2D, 0xD4, 0x6E, 0x9B, 0x43, 0x37, 0x50, 0xE8, 0xEA, 0x0D, 0xCC, 0x18, 0x70 },
+	{ 0xF2, 0x9B, 0x1B, 0x1A, 0xB9, 0xBA, 0xB1, 0x63, 0x01, 0x8E, 0xE3, 0xDA, 0x15, 0x23, 0x2C, 0xCA, 0x78, 0xEC, 0x52, 0xDB, 0xC3, 0x4E, 0xDA, 0x5B, 0x82, 0x2E, 0xC1, 0xD8, 0x0F, 0xC2, 0x1B, 0xD0 },
+	{ 0x9E, 0xE3, 0xE3, 0xE7, 0xE9, 0x00, 0xF1, 0xE1, 0x1D, 0x30, 0x8C, 0x4B, 0x2B, 0x30, 0x76, 0xD2, 0x72, 0xCF, 0x70, 0x12, 0x4F, 0x9F, 0x51, 0xE1, 0xDA, 0x60, 0xF3, 0x78, 0x46, 0xCD, 0xD2, 0xF4 },
+	{ 0x70, 0xEA, 0x3B, 0x01, 0x76, 0x92, 0x7D, 0x90, 0x96, 0xA1, 0x85, 0x08, 0xCD, 0x12, 0x3A, 0x29, 0x03, 0x25, 0x92, 0x0A, 0x9D, 0x00, 0xA8, 0x9B, 0x5D, 0xE0, 0x42, 0x73, 0xFB, 0xC7, 0x6B, 0x85 },
+	{ 0x67, 0xDE, 0x25, 0xC0, 0x2A, 0x4A, 0xAB, 0xA2, 0x3B, 0xDC, 0x97, 0x3C, 0x8B, 0xB0, 0xB5, 0x79, 0x6D, 0x47, 0xCC, 0x06, 0x59, 0xD4, 0x3D, 0xFF, 0x1F, 0x97, 0xDE, 0x17, 0x49, 0x63, 0xB6, 0x8E },
+	{ 0xB2, 0x16, 0x8E, 0x4E, 0x0F, 0x18, 0xB0, 0xE6, 0x41, 0x00, 0xB5, 0x17, 0xED, 0x95, 0x25, 0x7D, 0x73, 0xF0, 0x62, 0x0D, 0xF8, 0x85, 0xC1, 0x3D, 0x2E, 0xCF, 0x79, 0x36, 0x7B, 0x38, 0x4C, 0xEE },
+	{ 0x2E, 0x7D, 0xEC, 0x24, 0x28, 0x85, 0x3B, 0x2C, 0x71, 0x76, 0x07, 0x45, 0x54, 0x1F, 0x7A, 0xFE, 0x98, 0x25, 0xB5, 0xDD, 0x77, 0xDF, 0x06, 0x51, 0x1D, 0x84, 0x41, 0xA9, 0x4B, 0xAC, 0xC9, 0x27 },
+	{ 0xCA, 0x9F, 0xFA, 0xC4, 0xC4, 0x3F, 0x0B, 0x48, 0x46, 0x1D, 0xC5, 0xC2, 0x63, 0xBE, 0xA3, 0xF6, 0xF0, 0x06, 0x11, 0xCE, 0xAC, 0xAB, 0xF6, 0xF8, 0x95, 0xBA, 0x2B, 0x01, 0x01, 0xDB, 0xB6, 0x8D },
+	{ 0x74, 0x10, 0xD4, 0x2D, 0x8F, 0xD1, 0xD5, 0xE9, 0xD2, 0xF5, 0x81, 0x5C, 0xB9, 0x34, 0x17, 0x99, 0x88, 0x28, 0xEF, 0x3C, 0x42, 0x30, 0xBF, 0xBD, 0x41, 0x2D, 0xF0, 0xA4, 0xA7, 0xA2, 0x50, 0x7A },
+	{ 0x50, 0x10, 0xF6, 0x84, 0x51, 0x6D, 0xCC, 0xD0, 0xB6, 0xEE, 0x08, 0x52, 0xC2, 0x51, 0x2B, 0x4D, 0xC0, 0x06, 0x6C, 0xF0, 0xD5, 0x6F, 0x35, 0x30, 0x29, 0x78, 0xDB, 0x8A, 0xE3, 0x2C, 0x6A, 0x81 },
+	{ 0xAC, 0xAA, 0xB5, 0x85, 0xF7, 0xB7, 0x9B, 0x71, 0x99, 0x35, 0xCE, 0xB8, 0x95, 0x23, 0xDD, 0xC5, 0x48, 0x27, 0xF7, 0x5C, 0x56, 0x88, 0x38, 0x56, 0x15, 0x4A, 0x56, 0xCD, 0xCD, 0x5E, 0xE9, 0x88 },
+	{ 0x66, 0x6D, 0xE5, 0xD1, 0x44, 0x0F, 0xEE, 0x73, 0x31, 0xAA, 0xF0, 0x12, 0x3A, 0x62, 0xEF, 0x2D, 0x8B, 0xA5, 0x74, 0x53, 0xA0, 0x76, 0x96, 0x35, 0xAC, 0x6C, 0xD0, 0x1E, 0x63, 0x3F, 0x77, 0x12 },
+	{ 0xA6, 0xF9, 0x86, 0x58, 0xF6, 0xEA, 0xBA, 0xF9, 0x02, 0xD8, 0xB3, 0x87, 0x1A, 0x4B, 0x10, 0x1D, 0x16, 0x19, 0x6E, 0x8A, 0x4B, 0x24, 0x1E, 0x15, 0x58, 0xFE, 0x29, 0x96, 0x6E, 0x10, 0x3E, 0x8D },
+	{ 0x89, 0x15, 0x46, 0xA8, 0xB2, 0x9F, 0x30, 0x47, 0xDD, 0xCF, 0xE5, 0xB0, 0x0E, 0x45, 0xFD, 0x55, 0x75, 0x63, 0x73, 0x10, 0x5E, 0xA8, 0x63, 0x7D, 0xFC, 0xFF, 0x54, 0x7B, 0x6E, 0xA9, 0x53, 0x5F },
+	{ 0x18, 0xDF, 0xBC, 0x1A, 0xC5, 0xD2, 0x5B, 0x07, 0x61, 0x13, 0x7D, 0xBD, 0x22, 0xC1, 0x7C, 0x82, 0x9D, 0x0F, 0x0E, 0xF1, 0xD8, 0x23, 0x44, 0xE9, 0xC8, 0x9C, 0x28, 0x66, 0x94, 0xDA, 0x24, 0xE8 },
+	{ 0xB5, 0x4B, 0x9B, 0x67, 0xF8, 0xFE, 0xD5, 0x4B, 0xBF, 0x5A, 0x26, 0x66, 0xDB, 0xDF, 0x4B, 0x23, 0xCF, 0xF1, 0xD1, 0xB6, 0xF4, 0xAF, 0xC9, 0x85, 0xB2, 0xE6, 0xD3, 0x30, 0x5A, 0x9F, 0xF8, 0x0F },
+	{ 0x7D, 0xB4, 0x42, 0xE1, 0x32, 0xBA, 0x59, 0xBC, 0x12, 0x89, 0xAA, 0x98, 0xB0, 0xD3, 0xE8, 0x06, 0x00, 0x4F, 0x8E, 0xC1, 0x28, 0x11, 0xAF, 0x1E, 0x2E, 0x33, 0xC6, 0x9B, 0xFD, 0xE7, 0x29, 0xE1 },
+	{ 0x25, 0x0F, 0x37, 0xCD, 0xC1, 0x5E, 0x81, 0x7D, 0x2F, 0x16, 0x0D, 0x99, 0x56, 0xC7, 0x1F, 0xE3, 0xEB, 0x5D, 0xB7, 0x45, 0x56, 0xE4, 0xAD, 0xF9, 0xA4, 0xFF, 0xAF, 0xBA, 0x74, 0x01, 0x03, 0x96 },
+	{ 0x4A, 0xB8, 0xA3, 0xDD, 0x1D, 0xDF, 0x8A, 0xD4, 0x3D, 0xAB, 0x13, 0xA2, 0x7F, 0x66, 0xA6, 0x54, 0x4F, 0x29, 0x05, 0x97, 0xFA, 0x96, 0x04, 0x0E, 0x0E, 0x1D, 0xB9, 0x26, 0x3A, 0xA4, 0x79, 0xF8 },
+	{ 0xEE, 0x61, 0x72, 0x7A, 0x07, 0x66, 0xDF, 0x93, 0x9C, 0xCD, 0xC8, 0x60, 0x33, 0x40, 0x44, 0xC7, 0x9A, 0x3C, 0x9B, 0x15, 0x62, 0x00, 0xBC, 0x3A, 0xA3, 0x29, 0x73, 0x48, 0x3D, 0x83, 0x41, 0xAE },
+	{ 0x3F, 0x68, 0xC7, 0xEC, 0x63, 0xAC, 0x11, 0xEB, 0xB9, 0x8F, 0x94, 0xB3, 0x39, 0xB0, 0x5C, 0x10, 0x49, 0x84, 0xFD, 0xA5, 0x01, 0x03, 0x06, 0x01, 0x44, 0xE5, 0xA2, 0xBF, 0xCC, 0xC9, 0xDA, 0x95 },
+	{ 0x05, 0x6F, 0x29, 0x81, 0x6B, 0x8A, 0xF8, 0xF5, 0x66, 0x82, 0xBC, 0x4D, 0x7C, 0xF0, 0x94, 0x11, 0x1D, 0xA7, 0x73, 0x3E, 0x72, 0x6C, 0xD1, 0x3D, 0x6B, 0x3E, 0x8E, 0xA0, 0x3E, 0x92, 0xA0, 0xD5 },
+	{ 0xF5, 0xEC, 0x43, 0xA2, 0x8A, 0xCB, 0xEF, 0xF1, 0xF3, 0x31, 0x8A, 0x5B, 0xCA, 0xC7, 0xC6, 0x6D, 0xDB, 0x52, 0x30, 0xB7, 0x9D, 0xB2, 0xD1, 0x05, 0xBC, 0xBE, 0x15, 0xF3, 0xC1, 0x14, 0x8D, 0x69 },
+	{ 0x2A, 0x69, 0x60, 0xAD, 0x1D, 0x8D, 0xD5, 0x47, 0x55, 0x5C, 0xFB, 0xD5, 0xE4, 0x60, 0x0F, 0x1E, 0xAA, 0x1C, 0x8E, 0xDA, 0x34, 0xDE, 0x03, 0x74, 0xEC, 0x4A, 0x26, 0xEA, 0xAA, 0xA3, 0x3B, 0x4E },
+	{ 0xDC, 0xC1, 0xEA, 0x7B, 0xAA, 0xB9, 0x33, 0x84, 0xF7, 0x6B, 0x79, 0x68, 0x66, 0x19, 0x97, 0x54, 0x74, 0x2F, 0x7B, 0x96, 0xD6, 0xB4, 0xC1, 0x20, 0x16, 0x5C, 0x04, 0xA6, 0xC4, 0xF5, 0xCE, 0x10 },
+	{ 0x13, 0xD5, 0xDF, 0x17, 0x92, 0x21, 0x37, 0x9C, 0x6A, 0x78, 0xC0, 0x7C, 0x79, 0x3F, 0xF5, 0x34, 0x87, 0xCA, 0xE6, 0xBF, 0x9F, 0xE8, 0x82, 0x54, 0x1A, 0xB0, 0xE7, 0x35, 0xE3, 0xEA, 0xDA, 0x3B },
+	{ 0x8C, 0x59, 0xE4, 0x40, 0x76, 0x41, 0xA0, 0x1E, 0x8F, 0xF9, 0x1F, 0x99, 0x80, 0xDC, 0x23, 0x6F, 0x4E, 0xCD, 0x6F, 0xCF, 0x52, 0x58, 0x9A, 0x09, 0x9A, 0x96, 0x16, 0x33, 0x96, 0x77, 0x14, 0xE1 },
+	{ 0x83, 0x3B, 0x1A, 0xC6, 0xA2, 0x51, 0xFD, 0x08, 0xFD, 0x6D, 0x90, 0x8F, 0xEA, 0x2A, 0x4E, 0xE1, 0xE0, 0x40, 0xBC, 0xA9, 0x3F, 0xC1, 0xA3, 0x8E, 0xC3, 0x82, 0x0E, 0x0C, 0x10, 0xBD, 0x82, 0xEA },
+	{ 0xA2, 0x44, 0xF9, 0x27, 0xF3, 0xB4, 0x0B, 0x8F, 0x6C, 0x39, 0x15, 0x70, 0xC7, 0x65, 0x41, 0x8F, 0x2F, 0x6E, 0x70, 0x8E, 0xAC, 0x90, 0x06, 0xC5, 0x1A, 0x7F, 0xEF, 0xF4, 0xAF, 0x3B, 0x2B, 0x9E },
+	{ 0x3D, 0x99, 0xED, 0x95, 0x50, 0xCF, 0x11, 0x96, 0xE6, 0xC4, 0xD2, 0x0C, 0x25, 0x96, 0x20, 0xF8, 0x58, 0xC3, 0xD7, 0x03, 0x37, 0x4C, 0x12, 0x8C, 0xE7, 0xB5, 0x90, 0x31, 0x0C, 0x83, 0x04, 0x6D },
+	{ 0x2B, 0x35, 0xC4, 0x7D, 0x7B, 0x87, 0x76, 0x1F, 0x0A, 0xE4, 0x3A, 0xC5, 0x6A, 0xC2, 0x7B, 0x9F, 0x25, 0x83, 0x03, 0x67, 0xB5, 0x95, 0xBE, 0x8C, 0x24, 0x0E, 0x94, 0x60, 0x0C, 0x6E, 0x33, 0x12 },
+	{ 0x5D, 0x11, 0xED, 0x37, 0xD2, 0x4D, 0xC7, 0x67, 0x30, 0x5C, 0xB7, 0xE1, 0x46, 0x7D, 0x87, 0xC0, 0x65, 0xAC, 0x4B, 0xC8, 0xA4, 0x26, 0xDE, 0x38, 0x99, 0x1F, 0xF5, 0x9A, 0xA8, 0x73, 0x5D, 0x02 },
+	{ 0xB8, 0x36, 0x47, 0x8E, 0x1C, 0xA0, 0x64, 0x0D, 0xCE, 0x6F, 0xD9, 0x10, 0xA5, 0x09, 0x62, 0x72, 0xC8, 0x33, 0x09, 0x90, 0xCD, 0x97, 0x86, 0x4A, 0xC2, 0xBF, 0x14, 0xEF, 0x6B, 0x23, 0x91, 0x4A },
+	{ 0x91, 0x00, 0xF9, 0x46, 0xD6, 0xCC, 0xDE, 0x3A, 0x59, 0x7F, 0x90, 0xD3, 0x9F, 0xC1, 0x21, 0x5B, 0xAD, 0xDC, 0x74, 0x13, 0x64, 0x3D, 0x85, 0xC2, 0x1C, 0x3E, 0xEE, 0x5D, 0x2D, 0xD3, 0x28, 0x94 },
+	{ 0xDA, 0x70, 0xEE, 0xDD, 0x23, 0xE6, 0x63, 0xAA, 0x1A, 0x74, 0xB9, 0x76, 0x69, 0x35, 0xB4, 0x79, 0x22, 0x2A, 0x72, 0xAF, 0xBA, 0x5C, 0x79, 0x51, 0x58, 0xDA, 0xD4, 0x1A, 0x3B, 0xD7, 0x7E, 0x40 },
+	{ 0xF0, 0x67, 0xED, 0x6A, 0x0D, 0xBD, 0x43, 0xAA, 0x0A, 0x92, 0x54, 0xE6, 0x9F, 0xD6, 0x6B, 0xDD, 0x8A, 0xCB, 0x87, 0xDE, 0x93, 0x6C, 0x25, 0x8C, 0xFB, 0x02, 0x28, 0x5F, 0x2C, 0x11, 0xFA, 0x79 },
+	{ 0x71, 0x5C, 0x99, 0xC7, 0xD5, 0x75, 0x80, 0xCF, 0x97, 0x53, 0xB4, 0xC1, 0xD7, 0x95, 0xE4, 0x5A, 0x83, 0xFB, 0xB2, 0x28, 0xC0, 0xD3, 0x6F, 0xBE, 0x20, 0xFA, 0xF3, 0x9B, 0xDD, 0x6D, 0x4E, 0x85 },
+	{ 0xE4, 0x57, 0xD6, 0xAD, 0x1E, 0x67, 0xCB, 0x9B, 0xBD, 0x17, 0xCB, 0xD6, 0x98, 0xFA, 0x6D, 0x7D, 0xAE, 0x0C, 0x9B, 0x7A, 0xD6, 0xCB, 0xD6, 0x53, 0x96, 0x34, 0xE3, 0x2A, 0x71, 0x9C, 0x84, 0x92 },
+	{ 0xEC, 0xE3, 0xEA, 0x81, 0x03, 0xE0, 0x24, 0x83, 0xC6, 0x4A, 0x70, 0xA4, 0xBD, 0xCE, 0xE8, 0xCE, 0xB6, 0x27, 0x8F, 0x25, 0x33, 0xF3, 0xF4, 0x8D, 0xBE, 0xED, 0xFB, 0xA9, 0x45, 0x31, 0xD4, 0xAE },
+	{ 0x38, 0x8A, 0xA5, 0xD3, 0x66, 0x7A, 0x97, 0xC6, 0x8D, 0x3D, 0x56, 0xF8, 0xF3, 0xEE, 0x8D, 0x3D, 0x36, 0x09, 0x1F, 0x17, 0xFE, 0x5D, 0x1B, 0x0D, 0x5D, 0x84, 0xC9, 0x3B, 0x2F, 0xFE, 0x40, 0xBD },
+	{ 0x8B, 0x6B, 0x31, 0xB9, 0xAD, 0x7C, 0x3D, 0x5C, 0xD8, 0x4B, 0xF9, 0x89, 0x47, 0xB9, 0xCD, 0xB5, 0x9D, 0xF8, 0xA2, 0x5F, 0xF7, 0x38, 0x10, 0x10, 0x13, 0xBE, 0x4F, 0xD6, 0x5E, 0x1D, 0xD1, 0xA3 },
+	{ 0x06, 0x62, 0x91, 0xF6, 0xBB, 0xD2, 0x5F, 0x3C, 0x85, 0x3D, 0xB7, 0xD8, 0xB9, 0x5C, 0x9A, 0x1C, 0xFB, 0x9B, 0xF1, 0xC1, 0xC9, 0x9F, 0xB9, 0x5A, 0x9B, 0x78, 0x69, 0xD9, 0x0F, 0x1C, 0x29, 0x03 },
+	{ 0xA7, 0x07, 0xEF, 0xBC, 0xCD, 0xCE, 0xED, 0x42, 0x96, 0x7A, 0x66, 0xF5, 0x53, 0x9B, 0x93, 0xED, 0x75, 0x60, 0xD4, 0x67, 0x30, 0x40, 0x16, 0xC4, 0x78, 0x0D, 0x77, 0x55, 0xA5, 0x65, 0xD4, 0xC4 },
+	{ 0x38, 0xC5, 0x3D, 0xFB, 0x70, 0xBE, 0x7E, 0x79, 0x2B, 0x07, 0xA6, 0xA3, 0x5B, 0x8A, 0x6A, 0x0A, 0xBA, 0x02, 0xC5, 0xC5, 0xF3, 0x8B, 0xAF, 0x5C, 0x82, 0x3F, 0xDF, 0xD9, 0xE4, 0x2D, 0x65, 0x7E },
+	{ 0xF2, 0x91, 0x13, 0x86, 0x50, 0x1D, 0x9A, 0xB9, 0xD7, 0x20, 0xCF, 0x8A, 0xD1, 0x05, 0x03, 0xD5, 0x63, 0x4B, 0xF4, 0xB7, 0xD1, 0x2B, 0x56, 0xDF, 0xB7, 0x4F, 0xEC, 0xC6, 0xE4, 0x09, 0x3F, 0x68 },
+	{ 0xC6, 0xF2, 0xBD, 0xD5, 0x2B, 0x81, 0xE6, 0xE4, 0xF6, 0x59, 0x5A, 0xBD, 0x4D, 0x7F, 0xB3, 0x1F, 0x65, 0x11, 0x69, 0xD0, 0x0F, 0xF3, 0x26, 0x92, 0x6B, 0x34, 0x94, 0x7B, 0x28, 0xA8, 0x39, 0x59 },
+	{ 0x29, 0x3D, 0x94, 0xB1, 0x8C, 0x98, 0xBB, 0x32, 0x23, 0x36, 0x6B, 0x8C, 0xE7, 0x4C, 0x28, 0xFB, 0xDF, 0x28, 0xE1, 0xF8, 0x4A, 0x33, 0x50, 0xB0, 0xEB, 0x2D, 0x18, 0x04, 0xA5, 0x77, 0x57, 0x9B },
+	{ 0x2C, 0x2F, 0xA5, 0xC0, 0xB5, 0x15, 0x33, 0x16, 0x5B, 0xC3, 0x75, 0xC2, 0x2E, 0x27, 0x81, 0x76, 0x82, 0x70, 0xA3, 0x83, 0x98, 0x5D, 0x13, 0xBD, 0x6B, 0x67, 0xB6, 0xFD, 0x67, 0xF8, 0x89, 0xEB },
+	{ 0xCA, 0xA0, 0x9B, 0x82, 0xB7, 0x25, 0x62, 0xE4, 0x3F, 0x4B, 0x22, 0x75, 0xC0, 0x91, 0x91, 0x8E, 0x62, 0x4D, 0x91, 0x16, 0x61, 0xCC, 0x81, 0x1B, 0xB5, 0xFA, 0xEC, 0x51, 0xF6, 0x08, 0x8E, 0xF7 },
+	{ 0x24, 0x76, 0x1E, 0x45, 0xE6, 0x74, 0x39, 0x53, 0x79, 0xFB, 0x17, 0x72, 0x9C, 0x78, 0xCB, 0x93, 0x9E, 0x6F, 0x74, 0xC5, 0xDF, 0xFB, 0x9C, 0x96, 0x1F, 0x49, 0x59, 0x82, 0xC3, 0xED, 0x1F, 0xE3 },
+	{ 0x55, 0xB7, 0x0A, 0x82, 0x13, 0x1E, 0xC9, 0x48, 0x88, 0xD7, 0xAB, 0x54, 0xA7, 0xC5, 0x15, 0x25, 0x5C, 0x39, 0x38, 0xBB, 0x10, 0xBC, 0x78, 0x4D, 0xC9, 0xB6, 0x7F, 0x07, 0x6E, 0x34, 0x1A, 0x73 },
+	{ 0x6A, 0xB9, 0x05, 0x7B, 0x97, 0x7E, 0xBC, 0x3C, 0xA4, 0xD4, 0xCE, 0x74, 0x50, 0x6C, 0x25, 0xCC, 0xCD, 0xC5, 0x66, 0x49, 0x7C, 0x45, 0x0B, 0x54, 0x15, 0xA3, 0x94, 0x86, 0xF8, 0x65, 0x7A, 0x03 },
+	{ 0x24, 0x06, 0x6D, 0xEE, 0xE0, 0xEC, 0xEE, 0x15, 0xA4, 0x5F, 0x0A, 0x32, 0x6D, 0x0F, 0x8D, 0xBC, 0x79, 0x76, 0x1E, 0xBB, 0x93, 0xCF, 0x8C, 0x03, 0x77, 0xAF, 0x44, 0x09, 0x78, 0xFC, 0xF9, 0x94 },
+	{ 0x20, 0x00, 0x0D, 0x3F, 0x66, 0xBA, 0x76, 0x86, 0x0D, 0x5A, 0x95, 0x06, 0x88, 0xB9, 0xAA, 0x0D, 0x76, 0xCF, 0xEA, 0x59, 0xB0, 0x05, 0xD8, 0x59, 0x91, 0x4B, 0x1A, 0x46, 0x65, 0x3A, 0x93, 0x9B },
+	{ 0xB9, 0x2D, 0xAA, 0x79, 0x60, 0x3E, 0x3B, 0xDB, 0xC3, 0xBF, 0xE0, 0xF4, 0x19, 0xE4, 0x09, 0xB2, 0xEA, 0x10, 0xDC, 0x43, 0x5B, 0xEE, 0xFE, 0x29, 0x59, 0xDA, 0x16, 0x89, 0x5D, 0x5D, 0xCA, 0x1C },
+	{ 0xE9, 0x47, 0x94, 0x87, 0x05, 0xB2, 0x06, 0xD5, 0x72, 0xB0, 0xE8, 0xF6, 0x2F, 0x66, 0xA6, 0x55, 0x1C, 0xBD, 0x6B, 0xC3, 0x05, 0xD2, 0x6C, 0xE7, 0x53, 0x9A, 0x12, 0xF9, 0xAA, 0xDF, 0x75, 0x71 },
+	{ 0x3D, 0x67, 0xC1, 0xB3, 0xF9, 0xB2, 0x39, 0x10, 0xE3, 0xD3, 0x5E, 0x6B, 0x0F, 0x2C, 0xCF, 0x44, 0xA0, 0xB5, 0x40, 0xA4, 0x5C, 0x18, 0xBA, 0x3C, 0x36, 0x26, 0x4D, 0xD4, 0x8E, 0x96, 0xAF, 0x6A },
+	{ 0xC7, 0x55, 0x8B, 0xAB, 0xDA, 0x04, 0xBC, 0xCB, 0x76, 0x4D, 0x0B, 0xBF, 0x33, 0x58, 0x42, 0x51, 0x41, 0x90, 0x2D, 0x22, 0x39, 0x1D, 0x9F, 0x8C, 0x59, 0x15, 0x9F, 0xEC, 0x9E, 0x49, 0xB1, 0x51 },
+	{ 0x0B, 0x73, 0x2B, 0xB0, 0x35, 0x67, 0x5A, 0x50, 0xFF, 0x58, 0xF2, 0xC2, 0x42, 0xE4, 0x71, 0x0A, 0xEC, 0xE6, 0x46, 0x70, 0x07, 0x9C, 0x13, 0x04, 0x4C, 0x79, 0xC9, 0xB7, 0x49, 0x1F, 0x70, 0x00 },
+	{ 0xD1, 0x20, 0xB5, 0xEF, 0x6D, 0x57, 0xEB, 0xF0, 0x6E, 0xAF, 0x96, 0xBC, 0x93, 0x3C, 0x96, 0x7B, 0x16, 0xCB, 0xE6, 0xE2, 0xBF, 0x00, 0x74, 0x1C, 0x30, 0xAA, 0x1C, 0x54, 0xBA, 0x64, 0x80, 0x1F },
+	{ 0x58, 0xD2, 0x12, 0xAD, 0x6F, 0x58, 0xAE, 0xF0, 0xF8, 0x01, 0x16, 0xB4, 0x41, 0xE5, 0x7F, 0x61, 0x95, 0xBF, 0xEF, 0x26, 0xB6, 0x14, 0x63, 0xED, 0xEC, 0x11, 0x83, 0xCD, 0xB0, 0x4F, 0xE7, 0x6D },
+	{ 0xB8, 0x83, 0x6F, 0x51, 0xD1, 0xE2, 0x9B, 0xDF, 0xDB, 0xA3, 0x25, 0x56, 0x53, 0x60, 0x26, 0x8B, 0x8F, 0xAD, 0x62, 0x74, 0x73, 0xED, 0xEC, 0xEF, 0x7E, 0xAE, 0xFE, 0xE8, 0x37, 0xC7, 0x40, 0x03 },
+	{ 0xC5, 0x47, 0xA3, 0xC1, 0x24, 0xAE, 0x56, 0x85, 0xFF, 0xA7, 0xB8, 0xED, 0xAF, 0x96, 0xEC, 0x86, 0xF8, 0xB2, 0xD0, 0xD5, 0x0C, 0xEE, 0x8B, 0xE3, 0xB1, 0xF0, 0xC7, 0x67, 0x63, 0x06, 0x9D, 0x9C },
+	{ 0x5D, 0x16, 0x8B, 0x76, 0x9A, 0x2F, 0x67, 0x85, 0x3D, 0x62, 0x95, 0xF7, 0x56, 0x8B, 0xE4, 0x0B, 0xB7, 0xA1, 0x6B, 0x8D, 0x65, 0xBA, 0x87, 0x63, 0x5D, 0x19, 0x78, 0xD2, 0xAB, 0x11, 0xBA, 0x2A },
+	{ 0xA2, 0xF6, 0x75, 0xDC, 0x73, 0x02, 0x63, 0x8C, 0xB6, 0x02, 0x01, 0x06, 0x4C, 0xA5, 0x50, 0x77, 0x71, 0x4D, 0x71, 0xFE, 0x09, 0x6A, 0x31, 0x5F, 0x2F, 0xE7, 0x40, 0x12, 0x77, 0xCA, 0xA5, 0xAF },
+	{ 0xC8, 0xAA, 0xB5, 0xCD, 0x01, 0x60, 0xAE, 0x78, 0xCD, 0x2E, 0x8A, 0xC5, 0xFB, 0x0E, 0x09, 0x3C, 0xDB, 0x5C, 0x4B, 0x60, 0x52, 0xA0, 0xA9, 0x7B, 0xB0, 0x42, 0x16, 0x82, 0x6F, 0xA7, 0xA4, 0x37 },
+	{ 0xFF, 0x68, 0xCA, 0x40, 0x35, 0xBF, 0xEB, 0x43, 0xFB, 0xF1, 0x45, 0xFD, 0xDD, 0x5E, 0x43, 0xF1, 0xCE, 0xA5, 0x4F, 0x11, 0xF7, 0xBE, 0xE1, 0x30, 0x58, 0xF0, 0x27, 0x32, 0x9A, 0x4A, 0x5F, 0xA4 },
+	{ 0x1D, 0x4E, 0x54, 0x87, 0xAE, 0x3C, 0x74, 0x0F, 0x2B, 0xA6, 0xE5, 0x41, 0xAC, 0x91, 0xBC, 0x2B, 0xFC, 0xD2, 0x99, 0x9C, 0x51, 0x8D, 0x80, 0x7B, 0x42, 0x67, 0x48, 0x80, 0x3A, 0x35, 0x0F, 0xD4 },
+	{ 0x6D, 0x24, 0x4E, 0x1A, 0x06, 0xCE, 0x4E, 0xF5, 0x78, 0xDD, 0x0F, 0x63, 0xAF, 0xF0, 0x93, 0x67, 0x06, 0x73, 0x51, 0x19, 0xCA, 0x9C, 0x8D, 0x22, 0xD8, 0x6C, 0x80, 0x14, 0x14, 0xAB, 0x97, 0x41 },
+	{ 0xDE, 0xCF, 0x73, 0x29, 0xDB, 0xCC, 0x82, 0x7B, 0x8F, 0xC5, 0x24, 0xC9, 0x43, 0x1E, 0x89, 0x98, 0x02, 0x9E, 0xCE, 0x12, 0xCE, 0x93, 0xB7, 0xB2, 0xF3, 0xE7, 0x69, 0xA9, 0x41, 0xFB, 0x8C, 0xEA },
+	{ 0x2F, 0xAF, 0xCC, 0x0F, 0x2E, 0x63, 0xCB, 0xD0, 0x77, 0x55, 0xBE, 0x7B, 0x75, 0xEC, 0xEA, 0x0A, 0xDF, 0xF9, 0xAA, 0x5E, 0xDE, 0x2A, 0x52, 0xFD, 0xAB, 0x4D, 0xFD, 0x03, 0x74, 0xCD, 0x48, 0x3F },
+	{ 0xAA, 0x85, 0x01, 0x0D, 0xD4, 0x6A, 0x54, 0x6B, 0x53, 0x5E, 0xF4, 0xCF, 0x5F, 0x07, 0xD6, 0x51, 0x61, 0xE8, 0x98, 0x28, 0xF3, 0xA7, 0x7D, 0xB7, 0xB9, 0xB5, 0x6F, 0x0D, 0xF5, 0x9A, 0xAE, 0x45 },
+	{ 0x07, 0xE8, 0xE1, 0xEE, 0x73, 0x2C, 0xB0, 0xD3, 0x56, 0xC9, 0xC0, 0xD1, 0x06, 0x9C, 0x89, 0xD1, 0x7A, 0xDF, 0x6A, 0x9A, 0x33, 0x4F, 0x74, 0x5E, 0xC7, 0x86, 0x73, 0x32, 0x54, 0x8C, 0xA8, 0xE9 },
+	{ 0x0E, 0x01, 0xE8, 0x1C, 0xAD, 0xA8, 0x16, 0x2B, 0xFD, 0x5F, 0x8A, 0x8C, 0x81, 0x8A, 0x6C, 0x69, 0xFE, 0xDF, 0x02, 0xCE, 0xB5, 0x20, 0x85, 0x23, 0xCB, 0xE5, 0x31, 0x3B, 0x89, 0xCA, 0x10, 0x53 },
+	{ 0x6B, 0xB6, 0xC6, 0x47, 0x26, 0x55, 0x08, 0x43, 0x99, 0x85, 0x2E, 0x00, 0x24, 0x9F, 0x8C, 0xB2, 0x47, 0x89, 0x6D, 0x39, 0x2B, 0x02, 0xD7, 0x3B, 0x7F, 0x0D, 0xD8, 0x18, 0xE1, 0xE2, 0x9B, 0x07 },
+	{ 0x42, 0xD4, 0x63, 0x6E, 0x20, 0x60, 0xF0, 0x8F, 0x41, 0xC8, 0x82, 0xE7, 0x6B, 0x39, 0x6B, 0x11, 0x2E, 0xF6, 0x27, 0xCC, 0x24, 0xC4, 0x3D, 0xD5, 0xF8, 0x3A, 0x1D, 0x1A, 0x7E, 0xAD, 0x71, 0x1A },
+	{ 0x48, 0x58, 0xC9, 0xA1, 0x88, 0xB0, 0x23, 0x4F, 0xB9, 0xA8, 0xD4, 0x7D, 0x0B, 0x41, 0x33, 0x65, 0x0A, 0x03, 0x0B, 0xD0, 0x61, 0x1B, 0x87, 0xC3, 0x89, 0x2E, 0x94, 0x95, 0x1F, 0x8D, 0xF8, 0x52 },
+	{ 0x3F, 0xAB, 0x3E, 0x36, 0x98, 0x8D, 0x44, 0x5A, 0x51, 0xC8, 0x78, 0x3E, 0x53, 0x1B, 0xE3, 0xA0, 0x2B, 0xE4, 0x0C, 0xD0, 0x47, 0x96, 0xCF, 0xB6, 0x1D, 0x40, 0x34, 0x74, 0x42, 0xD3, 0xF7, 0x94 },
+	{ 0xEB, 0xAB, 0xC4, 0x96, 0x36, 0xBD, 0x43, 0x3D, 0x2E, 0xC8, 0xF0, 0xE5, 0x18, 0x73, 0x2E, 0xF8, 0xFA, 0x21, 0xD4, 0xD0, 0x71, 0xCC, 0x3B, 0xC4, 0x6C, 0xD7, 0x9F, 0xA3, 0x8A, 0x28, 0xB8, 0x10 },
+	{ 0xA1, 0xD0, 0x34, 0x35, 0x23, 0xB8, 0x93, 0xFC, 0xA8, 0x4F, 0x47, 0xFE, 0xB4, 0xA6, 0x4D, 0x35, 0x0A, 0x17, 0xD8, 0xEE, 0xF5, 0x49, 0x7E, 0xCE, 0x69, 0x7D, 0x02, 0xD7, 0x91, 0x78, 0xB5, 0x91 },
+	{ 0x26, 0x2E, 0xBF, 0xD9, 0x13, 0x0B, 0x7D, 0x28, 0x76, 0x0D, 0x08, 0xEF, 0x8B, 0xFD, 0x3B, 0x86, 0xCD, 0xD3, 0xB2, 0x11, 0x3D, 0x2C, 0xAE, 0xF7, 0xEA, 0x95, 0x1A, 0x30, 0x3D, 0xFA, 0x38, 0x46 },
+	{ 0xF7, 0x61, 0x58, 0xED, 0xD5, 0x0A, 0x15, 0x4F, 0xA7, 0x82, 0x03, 0xED, 0x23, 0x62, 0x93, 0x2F, 0xCB, 0x82, 0x53, 0xAA, 0xE3, 0x78, 0x90, 0x3E, 0xDE, 0xD1, 0xE0, 0x3F, 0x70, 0x21, 0xA2, 0x57 },
+	{ 0x26, 0x17, 0x8E, 0x95, 0x0A, 0xC7, 0x22, 0xF6, 0x7A, 0xE5, 0x6E, 0x57, 0x1B, 0x28, 0x4C, 0x02, 0x07, 0x68, 0x4A, 0x63, 0x34, 0xA1, 0x77, 0x48, 0xA9, 0x4D, 0x26, 0x0B, 0xC5, 0xF5, 0x52, 0x74 },
+	{ 0xC3, 0x78, 0xD1, 0xE4, 0x93, 0xB4, 0x0E, 0xF1, 0x1F, 0xE6, 0xA1, 0x5D, 0x9C, 0x27, 0x37, 0xA3, 0x78, 0x09, 0x63, 0x4C, 0x5A, 0xBA, 0xD5, 0xB3, 0x3D, 0x7E, 0x39, 0x3B, 0x4A, 0xE0, 0x5D, 0x03 },
+	{ 0x98, 0x4B, 0xD8, 0x37, 0x91, 0x01, 0xBE, 0x8F, 0xD8, 0x06, 0x12, 0xD8, 0xEA, 0x29, 0x59, 0xA7, 0x86, 0x5E, 0xC9, 0x71, 0x85, 0x23, 0x55, 0x01, 0x07, 0xAE, 0x39, 0x38, 0xDF, 0x32, 0x01, 0x1B },
+	{ 0xC6, 0xF2, 0x5A, 0x81, 0x2A, 0x14, 0x48, 0x58, 0xAC, 0x5C, 0xED, 0x37, 0xA9, 0x3A, 0x9F, 0x47, 0x59, 0xBA, 0x0B, 0x1C, 0x0F, 0xDC, 0x43, 0x1D, 0xCE, 0x35, 0xF9, 0xEC, 0x1F, 0x1F, 0x4A, 0x99 },
+	{ 0x92, 0x4C, 0x75, 0xC9, 0x44, 0x24, 0xFF, 0x75, 0xE7, 0x4B, 0x8B, 0x4E, 0x94, 0x35, 0x89, 0x58, 0xB0, 0x27, 0xB1, 0x71, 0xDF, 0x5E, 0x57, 0x89, 0x9A, 0xD0, 0xD4, 0xDA, 0xC3, 0x73, 0x53, 0xB6 },
+	{ 0x0A, 0xF3, 0x58, 0x92, 0xA6, 0x3F, 0x45, 0x93, 0x1F, 0x68, 0x46, 0xED, 0x19, 0x03, 0x61, 0xCD, 0x07, 0x30, 0x89, 0xE0, 0x77, 0x16, 0x57, 0x14, 0xB5, 0x0B, 0x81, 0xA2, 0xE3, 0xDD, 0x9B, 0xA1 },
+	{ 0xCC, 0x80, 0xCE, 0xFB, 0x26, 0xC3, 0xB2, 0xB0, 0xDA, 0xEF, 0x23, 0x3E, 0x60, 0x6D, 0x5F, 0xFC, 0x80, 0xFA, 0x17, 0x42, 0x7D, 0x18, 0xE3, 0x04, 0x89, 0x67, 0x3E, 0x06, 0xEF, 0x4B, 0x87, 0xF7 },
+	{ 0xC2, 0xF8, 0xC8, 0x11, 0x74, 0x47, 0xF3, 0x97, 0x8B, 0x08, 0x18, 0xDC, 0xF6, 0xF7, 0x01, 0x16, 0xAC, 0x56, 0xFD, 0x18, 0x4D, 0xD1, 0x27, 0x84, 0x94, 0xE1, 0x03, 0xFC, 0x6D, 0x74, 0xA8, 0x87 },
+	{ 0xBD, 0xEC, 0xF6, 0xBF, 0xC1, 0xBA, 0x0D, 0xF6, 0xE8, 0x62, 0xC8, 0x31, 0x99, 0x22, 0x07, 0x79, 0x6A, 0xCC, 0x79, 0x79, 0x68, 0x35, 0x88, 0x28, 0xC0, 0x6E, 0x7A, 0x51, 0xE0, 0x90, 0x09, 0x8F },
+	{ 0x24, 0xD1, 0xA2, 0x6E, 0x3D, 0xAB, 0x02, 0xFE, 0x45, 0x72, 0xD2, 0xAA, 0x7D, 0xBD, 0x3E, 0xC3, 0x0F, 0x06, 0x93, 0xDB, 0x26, 0xF2, 0x73, 0xD0, 0xAB, 0x2C, 0xB0, 0xC1, 0x3B, 0x5E, 0x64, 0x51 },
+	{ 0xEC, 0x56, 0xF5, 0x8B, 0x09, 0x29, 0x9A, 0x30, 0x0B, 0x14, 0x05, 0x65, 0xD7, 0xD3, 0xE6, 0x87, 0x82, 0xB6, 0xE2, 0xFB, 0xEB, 0x4B, 0x7E, 0xA9, 0x7A, 0xC0, 0x57, 0x98, 0x90, 0x61, 0xDD, 0x3F },
+	{ 0x11, 0xA4, 0x37, 0xC1, 0xAB, 0xA3, 0xC1, 0x19, 0xDD, 0xFA, 0xB3, 0x1B, 0x3E, 0x8C, 0x84, 0x1D, 0xEE, 0xEB, 0x91, 0x3E, 0xF5, 0x7F, 0x7E, 0x48, 0xF2, 0xC9, 0xCF, 0x5A, 0x28, 0xFA, 0x42, 0xBC },
+	{ 0x53, 0xC7, 0xE6, 0x11, 0x4B, 0x85, 0x0A, 0x2C, 0xB4, 0x96, 0xC9, 0xB3, 0xC6, 0x9A, 0x62, 0x3E, 0xAE, 0xA2, 0xCB, 0x1D, 0x33, 0xDD, 0x81, 0x7E, 0x47, 0x65, 0xED, 0xAA, 0x68, 0x23, 0xC2, 0x28 },
+	{ 0x15, 0x4C, 0x3E, 0x96, 0xFE, 0xE5, 0xDB, 0x14, 0xF8, 0x77, 0x3E, 0x18, 0xAF, 0x14, 0x85, 0x79, 0x13, 0x50, 0x9D, 0xA9, 0x99, 0xB4, 0x6C, 0xDD, 0x3D, 0x4C, 0x16, 0x97, 0x60, 0xC8, 0x3A, 0xD2 },
+	{ 0x40, 0xB9, 0x91, 0x6F, 0x09, 0x3E, 0x02, 0x7A, 0x87, 0x86, 0x64, 0x18, 0x18, 0x92, 0x06, 0x20, 0x47, 0x2F, 0xBC, 0xF6, 0x8F, 0x70, 0x1D, 0x1B, 0x68, 0x06, 0x32, 0xE6, 0x99, 0x6B, 0xDE, 0xD3 },
+	{ 0x24, 0xC4, 0xCB, 0xBA, 0x07, 0x11, 0x98, 0x31, 0xA7, 0x26, 0xB0, 0x53, 0x05, 0xD9, 0x6D, 0xA0, 0x2F, 0xF8, 0xB1, 0x48, 0xF0, 0xDA, 0x44, 0x0F, 0xE2, 0x33, 0xBC, 0xAA, 0x32, 0xC7, 0x2F, 0x6F },
+	{ 0x5D, 0x20, 0x15, 0x10, 0x25, 0x00, 0x20, 0xB7, 0x83, 0x68, 0x96, 0x88, 0xAB, 0xBF, 0x8E, 0xCF, 0x25, 0x94, 0xA9, 0x6A, 0x08, 0xF2, 0xBF, 0xEC, 0x6C, 0xE0, 0x57, 0x44, 0x65, 0xDD, 0xED, 0x71 },
+	{ 0x04, 0x3B, 0x97, 0xE3, 0x36, 0xEE, 0x6F, 0xDB, 0xBE, 0x2B, 0x50, 0xF2, 0x2A, 0xF8, 0x32, 0x75, 0xA4, 0x08, 0x48, 0x05, 0xD2, 0xD5, 0x64, 0x59, 0x62, 0x45, 0x4B, 0x6C, 0x9B, 0x80, 0x53, 0xA0 },
+	{ 0x56, 0x48, 0x35, 0xCB, 0xAE, 0xA7, 0x74, 0x94, 0x85, 0x68, 0xBE, 0x36, 0xCF, 0x52, 0xFC, 0xDD, 0x83, 0x93, 0x4E, 0xB0, 0xA2, 0x75, 0x12, 0xDB, 0xE3, 0xE2, 0xDB, 0x47, 0xB9, 0xE6, 0x63, 0x5A },
+	{ 0xF2, 0x1C, 0x33, 0xF4, 0x7B, 0xDE, 0x40, 0xA2, 0xA1, 0x01, 0xC9, 0xCD, 0xE8, 0x02, 0x7A, 0xAF, 0x61, 0xA3, 0x13, 0x7D, 0xE2, 0x42, 0x2B, 0x30, 0x03, 0x5A, 0x04, 0xC2, 0x70, 0x89, 0x41, 0x83 },
+	{ 0x9D, 0xB0, 0xEF, 0x74, 0xE6, 0x6C, 0xBB, 0x84, 0x2E, 0xB0, 0xE0, 0x73, 0x43, 0xA0, 0x3C, 0x5C, 0x56, 0x7E, 0x37, 0x2B, 0x3F, 0x23, 0xB9, 0x43, 0xC7, 0x88, 0xA4, 0xF2, 0x50, 0xF6, 0x78, 0x91 },
+	{ 0xAB, 0x8D, 0x08, 0x65, 0x5F, 0xF1, 0xD3, 0xFE, 0x87, 0x58, 0xD5, 0x62, 0x23, 0x5F, 0xD2, 0x3E, 0x7C, 0xF9, 0xDC, 0xAA, 0xD6, 0x58, 0x87, 0x2A, 0x49, 0xE5, 0xD3, 0x18, 0x3B, 0x6C, 0xCE, 0xBD },
+	{ 0x6F, 0x27, 0xF7, 0x7E, 0x7B, 0xCF, 0x46, 0xA1, 0xE9, 0x63, 0xAD, 0xE0, 0x30, 0x97, 0x33, 0x54, 0x30, 0x31, 0xDC, 0xCD, 0xD4, 0x7C, 0xAA, 0xC1, 0x74, 0xD7, 0xD2, 0x7C, 0xE8, 0x07, 0x7E, 0x8B },
+	{ 0xE3, 0xCD, 0x54, 0xDA, 0x7E, 0x44, 0x4C, 0xAA, 0x62, 0x07, 0x56, 0x95, 0x25, 0xA6, 0x70, 0xEB, 0xAE, 0x12, 0x78, 0xDE, 0x4E, 0x3F, 0xE2, 0x68, 0x4B, 0x3E, 0x33, 0xF5, 0xEF, 0x90, 0xCC, 0x1B },
+	{ 0xB2, 0xC3, 0xE3, 0x3A, 0x51, 0xD2, 0x2C, 0x4C, 0x08, 0xFC, 0x09, 0x89, 0xC8, 0x73, 0xC9, 0xCC, 0x41, 0x50, 0x57, 0x9B, 0x1E, 0x61, 0x63, 0xFA, 0x69, 0x4A, 0xD5, 0x1D, 0x53, 0xD7, 0x12, 0xDC },
+	{ 0xBE, 0x7F, 0xDA, 0x98, 0x3E, 0x13, 0x18, 0x9B, 0x4C, 0x77, 0xE0, 0xA8, 0x09, 0x20, 0xB6, 0xE0, 0xE0, 0xEA, 0x80, 0xC3, 0xB8, 0x4D, 0xBE, 0x7E, 0x71, 0x17, 0xD2, 0x53, 0xF4, 0x81, 0x12, 0xF4 },
+	{ 0xB6, 0x00, 0x8C, 0x28, 0xFA, 0xE0, 0x8A, 0xA4, 0x27, 0xE5, 0xBD, 0x3A, 0xAD, 0x36, 0xF1, 0x00, 0x21, 0xF1, 0x6C, 0x77, 0xCF, 0xEA, 0xBE, 0xD0, 0x7F, 0x97, 0xCC, 0x7D, 0xC1, 0xF1, 0x28, 0x4A },
+	{ 0x6E, 0x4E, 0x67, 0x60, 0xC5, 0x38, 0xF2, 0xE9, 0x7B, 0x3A, 0xDB, 0xFB, 0xBC, 0xDE, 0x57, 0xF8, 0x96, 0x6B, 0x7E, 0xA8, 0xFC, 0xB5, 0xBF, 0x7E, 0xFE, 0xC9, 0x13, 0xFD, 0x2A, 0x2B, 0x0C, 0x55 },
+	{ 0x4A, 0xE5, 0x1F, 0xD1, 0x83, 0x4A, 0xA5, 0xBD, 0x9A, 0x6F, 0x7E, 0xC3, 0x9F, 0xC6, 0x63, 0x33, 0x8D, 0xC5, 0xD2, 0xE2, 0x07, 0x61, 0x56, 0x6D, 0x90, 0xCC, 0x68, 0xB1, 0xCB, 0x87, 0x5E, 0xD8 },
+	{ 0xB6, 0x73, 0xAA, 0xD7, 0x5A, 0xB1, 0xFD, 0xB5, 0x40, 0x1A, 0xBF, 0xA1, 0xBF, 0x89, 0xF3, 0xAD, 0xD2, 0xEB, 0xC4, 0x68, 0xDF, 0x36, 0x24, 0xA4, 0x78, 0xF4, 0xFE, 0x85, 0x9D, 0x8D, 0x55, 0xE2 },
+	{ 0x13, 0xC9, 0x47, 0x1A, 0x98, 0x55, 0x91, 0x35, 0x39, 0x83, 0x66, 0x60, 0x39, 0x8D, 0xA0, 0xF3, 0xF9, 0x9A, 0xDA, 0x08, 0x47, 0x9C, 0x69, 0xD1, 0xB7, 0xFC, 0xAA, 0x34, 0x61, 0xDD, 0x7E, 0x59 },
+	{ 0x2C, 0x11, 0xF4, 0xA7, 0xF9, 0x9A, 0x1D, 0x23, 0xA5, 0x8B, 0xB6, 0x36, 0x35, 0x0F, 0xE8, 0x49, 0xF2, 0x9C, 0xBA, 0xC1, 0xB2, 0xA1, 0x11, 0x2D, 0x9F, 0x1E, 0xD5, 0xBC, 0x5B, 0x31, 0x3C, 0xCD },
+	{ 0xC7, 0xD3, 0xC0, 0x70, 0x6B, 0x11, 0xAE, 0x74, 0x1C, 0x05, 0xA1, 0xEF, 0x15, 0x0D, 0xD6, 0x5B, 0x54, 0x94, 0xD6, 0xD5, 0x4C, 0x9A, 0x86, 0xE2, 0x61, 0x78, 0x54, 0xE6, 0xAE, 0xEE, 0xBB, 0xD9 },
+	{ 0x19, 0x4E, 0x10, 0xC9, 0x38, 0x93, 0xAF, 0xA0, 0x64, 0xC3, 0xAC, 0x04, 0xC0, 0xDD, 0x80, 0x8D, 0x79, 0x1C, 0x3D, 0x4B, 0x75, 0x56, 0xE8, 0x9D, 0x8D, 0x9C, 0xB2, 0x25, 0xC4, 0xB3, 0x33, 0x39 },
+	{ 0x6F, 0xC4, 0x98, 0x8B, 0x8F, 0x78, 0x54, 0x6B, 0x16, 0x88, 0x99, 0x18, 0x45, 0x90, 0x8F, 0x13, 0x4B, 0x6A, 0x48, 0x2E, 0x69, 0x94, 0xB3, 0xD4, 0x83, 0x17, 0xBF, 0x08, 0xDB, 0x29, 0x21, 0x85 },
+	{ 0x56, 0x65, 0xBE, 0xB8, 0xB0, 0x95, 0x55, 0x25, 0x81, 0x3B, 0x59, 0x81, 0xCD, 0x14, 0x2E, 0xD4, 0xD0, 0x3F, 0xBA, 0x38, 0xA6, 0xF3, 0xE5, 0xAD, 0x26, 0x8E, 0x0C, 0xC2, 0x70, 0xD1, 0xCD, 0x11 },
+	{ 0xB8, 0x83, 0xD6, 0x8F, 0x5F, 0xE5, 0x19, 0x36, 0x43, 0x1B, 0xA4, 0x25, 0x67, 0x38, 0x05, 0x3B, 0x1D, 0x04, 0x26, 0xD4, 0xCB, 0x64, 0xB1, 0x6E, 0x83, 0xBA, 0xDC, 0x5E, 0x9F, 0xBE, 0x3B, 0x81 },
+	{ 0x53, 0xE7, 0xB2, 0x7E, 0xA5, 0x9C, 0x2F, 0x6D, 0xBB, 0x50, 0x76, 0x9E, 0x43, 0x55, 0x4D, 0xF3, 0x5A, 0xF8, 0x9F, 0x48, 0x22, 0xD0, 0x46, 0x6B, 0x00, 0x7D, 0xD6, 0xF6, 0xDE, 0xAF, 0xFF, 0x02 },
+	{ 0x1F, 0x1A, 0x02, 0x29, 0xD4, 0x64, 0x0F, 0x01, 0x90, 0x15, 0x88, 0xD9, 0xDE, 0xC2, 0x2D, 0x13, 0xFC, 0x3E, 0xB3, 0x4A, 0x61, 0xB3, 0x29, 0x38, 0xEF, 0xBF, 0x53, 0x34, 0xB2, 0x80, 0x0A, 0xFA },
+	{ 0xC2, 0xB4, 0x05, 0xAF, 0xA0, 0xFA, 0x66, 0x68, 0x85, 0x2A, 0xEE, 0x4D, 0x88, 0x04, 0x08, 0x53, 0xFA, 0xB8, 0x00, 0xE7, 0x2B, 0x57, 0x58, 0x14, 0x18, 0xE5, 0x50, 0x6F, 0x21, 0x4C, 0x7D, 0x1F },
+	{ 0xC0, 0x8A, 0xA1, 0xC2, 0x86, 0xD7, 0x09, 0xFD, 0xC7, 0x47, 0x37, 0x44, 0x97, 0x71, 0x88, 0xC8, 0x95, 0xBA, 0x01, 0x10, 0x14, 0x24, 0x7E, 0x4E, 0xFA, 0x8D, 0x07, 0xE7, 0x8F, 0xEC, 0x69, 0x5C },
+	{ 0xF0, 0x3F, 0x57, 0x89, 0xD3, 0x33, 0x6B, 0x80, 0xD0, 0x02, 0xD5, 0x9F, 0xDF, 0x91, 0x8B, 0xDB, 0x77, 0x5B, 0x00, 0x95, 0x6E, 0xD5, 0x52, 0x8E, 0x86, 0xAA, 0x99, 0x4A, 0xCB, 0x38, 0xFE, 0x2D }
+};
+
+static const u8 blake2s_keyed_testvecs[][BLAKE2S_OUTBYTES] __initconst = {
+	{ 0x48, 0xA8, 0x99, 0x7D, 0xA4, 0x07, 0x87, 0x6B, 0x3D, 0x79, 0xC0, 0xD9, 0x23, 0x25, 0xAD, 0x3B, 0x89, 0xCB, 0xB7, 0x54, 0xD8, 0x6A, 0xB7, 0x1A, 0xEE, 0x04, 0x7A, 0xD3, 0x45, 0xFD, 0x2C, 0x49 },
+	{ 0x40, 0xD1, 0x5F, 0xEE, 0x7C, 0x32, 0x88, 0x30, 0x16, 0x6A, 0xC3, 0xF9, 0x18, 0x65, 0x0F, 0x80, 0x7E, 0x7E, 0x01, 0xE1, 0x77, 0x25, 0x8C, 0xDC, 0x0A, 0x39, 0xB1, 0x1F, 0x59, 0x80, 0x66, 0xF1 },
+	{ 0x6B, 0xB7, 0x13, 0x00, 0x64, 0x4C, 0xD3, 0x99, 0x1B, 0x26, 0xCC, 0xD4, 0xD2, 0x74, 0xAC, 0xD1, 0xAD, 0xEA, 0xB8, 0xB1, 0xD7, 0x91, 0x45, 0x46, 0xC1, 0x19, 0x8B, 0xBE, 0x9F, 0xC9, 0xD8, 0x03 },
+	{ 0x1D, 0x22, 0x0D, 0xBE, 0x2E, 0xE1, 0x34, 0x66, 0x1F, 0xDF, 0x6D, 0x9E, 0x74, 0xB4, 0x17, 0x04, 0x71, 0x05, 0x56, 0xF2, 0xF6, 0xE5, 0xA0, 0x91, 0xB2, 0x27, 0x69, 0x74, 0x45, 0xDB, 0xEA, 0x6B },
+	{ 0xF6, 0xC3, 0xFB, 0xAD, 0xB4, 0xCC, 0x68, 0x7A, 0x00, 0x64, 0xA5, 0xBE, 0x6E, 0x79, 0x1B, 0xEC, 0x63, 0xB8, 0x68, 0xAD, 0x62, 0xFB, 0xA6, 0x1B, 0x37, 0x57, 0xEF, 0x9C, 0xA5, 0x2E, 0x05, 0xB2 },
+	{ 0x49, 0xC1, 0xF2, 0x11, 0x88, 0xDF, 0xD7, 0x69, 0xAE, 0xA0, 0xE9, 0x11, 0xDD, 0x6B, 0x41, 0xF1, 0x4D, 0xAB, 0x10, 0x9D, 0x2B, 0x85, 0x97, 0x7A, 0xA3, 0x08, 0x8B, 0x5C, 0x70, 0x7E, 0x85, 0x98 },
+	{ 0xFD, 0xD8, 0x99, 0x3D, 0xCD, 0x43, 0xF6, 0x96, 0xD4, 0x4F, 0x3C, 0xEA, 0x0F, 0xF3, 0x53, 0x45, 0x23, 0x4E, 0xC8, 0xEE, 0x08, 0x3E, 0xB3, 0xCA, 0xDA, 0x01, 0x7C, 0x7F, 0x78, 0xC1, 0x71, 0x43 },
+	{ 0xE6, 0xC8, 0x12, 0x56, 0x37, 0x43, 0x8D, 0x09, 0x05, 0xB7, 0x49, 0xF4, 0x65, 0x60, 0xAC, 0x89, 0xFD, 0x47, 0x1C, 0xF8, 0x69, 0x2E, 0x28, 0xFA, 0xB9, 0x82, 0xF7, 0x3F, 0x01, 0x9B, 0x83, 0xA9 },
+	{ 0x19, 0xFC, 0x8C, 0xA6, 0x97, 0x9D, 0x60, 0xE6, 0xED, 0xD3, 0xB4, 0x54, 0x1E, 0x2F, 0x96, 0x7C, 0xED, 0x74, 0x0D, 0xF6, 0xEC, 0x1E, 0xAE, 0xBB, 0xFE, 0x81, 0x38, 0x32, 0xE9, 0x6B, 0x29, 0x74 },
+	{ 0xA6, 0xAD, 0x77, 0x7C, 0xE8, 0x81, 0xB5, 0x2B, 0xB5, 0xA4, 0x42, 0x1A, 0xB6, 0xCD, 0xD2, 0xDF, 0xBA, 0x13, 0xE9, 0x63, 0x65, 0x2D, 0x4D, 0x6D, 0x12, 0x2A, 0xEE, 0x46, 0x54, 0x8C, 0x14, 0xA7 },
+	{ 0xF5, 0xC4, 0xB2, 0xBA, 0x1A, 0x00, 0x78, 0x1B, 0x13, 0xAB, 0xA0, 0x42, 0x52, 0x42, 0xC6, 0x9C, 0xB1, 0x55, 0x2F, 0x3F, 0x71, 0xA9, 0xA3, 0xBB, 0x22, 0xB4, 0xA6, 0xB4, 0x27, 0x7B, 0x46, 0xDD },
+	{ 0xE3, 0x3C, 0x4C, 0x9B, 0xD0, 0xCC, 0x7E, 0x45, 0xC8, 0x0E, 0x65, 0xC7, 0x7F, 0xA5, 0x99, 0x7F, 0xEC, 0x70, 0x02, 0x73, 0x85, 0x41, 0x50, 0x9E, 0x68, 0xA9, 0x42, 0x38, 0x91, 0xE8, 0x22, 0xA3 },
+	{ 0xFB, 0xA1, 0x61, 0x69, 0xB2, 0xC3, 0xEE, 0x10, 0x5B, 0xE6, 0xE1, 0xE6, 0x50, 0xE5, 0xCB, 0xF4, 0x07, 0x46, 0xB6, 0x75, 0x3D, 0x03, 0x6A, 0xB5, 0x51, 0x79, 0x01, 0x4A, 0xD7, 0xEF, 0x66, 0x51 },
+	{ 0xF5, 0xC4, 0xBE, 0xC6, 0xD6, 0x2F, 0xC6, 0x08, 0xBF, 0x41, 0xCC, 0x11, 0x5F, 0x16, 0xD6, 0x1C, 0x7E, 0xFD, 0x3F, 0xF6, 0xC6, 0x56, 0x92, 0xBB, 0xE0, 0xAF, 0xFF, 0xB1, 0xFE, 0xDE, 0x74, 0x75 },
+	{ 0xA4, 0x86, 0x2E, 0x76, 0xDB, 0x84, 0x7F, 0x05, 0xBA, 0x17, 0xED, 0xE5, 0xDA, 0x4E, 0x7F, 0x91, 0xB5, 0x92, 0x5C, 0xF1, 0xAD, 0x4B, 0xA1, 0x27, 0x32, 0xC3, 0x99, 0x57, 0x42, 0xA5, 0xCD, 0x6E },
+	{ 0x65, 0xF4, 0xB8, 0x60, 0xCD, 0x15, 0xB3, 0x8E, 0xF8, 0x14, 0xA1, 0xA8, 0x04, 0x31, 0x4A, 0x55, 0xBE, 0x95, 0x3C, 0xAA, 0x65, 0xFD, 0x75, 0x8A, 0xD9, 0x89, 0xFF, 0x34, 0xA4, 0x1C, 0x1E, 0xEA },
+	{ 0x19, 0xBA, 0x23, 0x4F, 0x0A, 0x4F, 0x38, 0x63, 0x7D, 0x18, 0x39, 0xF9, 0xD9, 0xF7, 0x6A, 0xD9, 0x1C, 0x85, 0x22, 0x30, 0x71, 0x43, 0xC9, 0x7D, 0x5F, 0x93, 0xF6, 0x92, 0x74, 0xCE, 0xC9, 0xA7 },
+	{ 0x1A, 0x67, 0x18, 0x6C, 0xA4, 0xA5, 0xCB, 0x8E, 0x65, 0xFC, 0xA0, 0xE2, 0xEC, 0xBC, 0x5D, 0xDC, 0x14, 0xAE, 0x38, 0x1B, 0xB8, 0xBF, 0xFE, 0xB9, 0xE0, 0xA1, 0x03, 0x44, 0x9E, 0x3E, 0xF0, 0x3C },
+	{ 0xAF, 0xBE, 0xA3, 0x17, 0xB5, 0xA2, 0xE8, 0x9C, 0x0B, 0xD9, 0x0C, 0xCF, 0x5D, 0x7F, 0xD0, 0xED, 0x57, 0xFE, 0x58, 0x5E, 0x4B, 0xE3, 0x27, 0x1B, 0x0A, 0x6B, 0xF0, 0xF5, 0x78, 0x6B, 0x0F, 0x26 },
+	{ 0xF1, 0xB0, 0x15, 0x58, 0xCE, 0x54, 0x12, 0x62, 0xF5, 0xEC, 0x34, 0x29, 0x9D, 0x6F, 0xB4, 0x09, 0x00, 0x09, 0xE3, 0x43, 0x4B, 0xE2, 0xF4, 0x91, 0x05, 0xCF, 0x46, 0xAF, 0x4D, 0x2D, 0x41, 0x24 },
+	{ 0x13, 0xA0, 0xA0, 0xC8, 0x63, 0x35, 0x63, 0x5E, 0xAA, 0x74, 0xCA, 0x2D, 0x5D, 0x48, 0x8C, 0x79, 0x7B, 0xBB, 0x4F, 0x47, 0xDC, 0x07, 0x10, 0x50, 0x15, 0xED, 0x6A, 0x1F, 0x33, 0x09, 0xEF, 0xCE },
+	{ 0x15, 0x80, 0xAF, 0xEE, 0xBE, 0xBB, 0x34, 0x6F, 0x94, 0xD5, 0x9F, 0xE6, 0x2D, 0xA0, 0xB7, 0x92, 0x37, 0xEA, 0xD7, 0xB1, 0x49, 0x1F, 0x56, 0x67, 0xA9, 0x0E, 0x45, 0xED, 0xF6, 0xCA, 0x8B, 0x03 },
+	{ 0x20, 0xBE, 0x1A, 0x87, 0x5B, 0x38, 0xC5, 0x73, 0xDD, 0x7F, 0xAA, 0xA0, 0xDE, 0x48, 0x9D, 0x65, 0x5C, 0x11, 0xEF, 0xB6, 0xA5, 0x52, 0x69, 0x8E, 0x07, 0xA2, 0xD3, 0x31, 0xB5, 0xF6, 0x55, 0xC3 },
+	{ 0xBE, 0x1F, 0xE3, 0xC4, 0xC0, 0x40, 0x18, 0xC5, 0x4C, 0x4A, 0x0F, 0x6B, 0x9A, 0x2E, 0xD3, 0xC5, 0x3A, 0xBE, 0x3A, 0x9F, 0x76, 0xB4, 0xD2, 0x6D, 0xE5, 0x6F, 0xC9, 0xAE, 0x95, 0x05, 0x9A, 0x99 },
+	{ 0xE3, 0xE3, 0xAC, 0xE5, 0x37, 0xEB, 0x3E, 0xDD, 0x84, 0x63, 0xD9, 0xAD, 0x35, 0x82, 0xE1, 0x3C, 0xF8, 0x65, 0x33, 0xFF, 0xDE, 0x43, 0xD6, 0x68, 0xDD, 0x2E, 0x93, 0xBB, 0xDB, 0xD7, 0x19, 0x5A },
+	{ 0x11, 0x0C, 0x50, 0xC0, 0xBF, 0x2C, 0x6E, 0x7A, 0xEB, 0x7E, 0x43, 0x5D, 0x92, 0xD1, 0x32, 0xAB, 0x66, 0x55, 0x16, 0x8E, 0x78, 0xA2, 0xDE, 0xCD, 0xEC, 0x33, 0x30, 0x77, 0x76, 0x84, 0xD9, 0xC1 },
+	{ 0xE9, 0xBA, 0x8F, 0x50, 0x5C, 0x9C, 0x80, 0xC0, 0x86, 0x66, 0xA7, 0x01, 0xF3, 0x36, 0x7E, 0x6C, 0xC6, 0x65, 0xF3, 0x4B, 0x22, 0xE7, 0x3C, 0x3C, 0x04, 0x17, 0xEB, 0x1C, 0x22, 0x06, 0x08, 0x2F },
+	{ 0x26, 0xCD, 0x66, 0xFC, 0xA0, 0x23, 0x79, 0xC7, 0x6D, 0xF1, 0x23, 0x17, 0x05, 0x2B, 0xCA, 0xFD, 0x6C, 0xD8, 0xC3, 0xA7, 0xB8, 0x90, 0xD8, 0x05, 0xF3, 0x6C, 0x49, 0x98, 0x97, 0x82, 0x43, 0x3A },
+	{ 0x21, 0x3F, 0x35, 0x96, 0xD6, 0xE3, 0xA5, 0xD0, 0xE9, 0x93, 0x2C, 0xD2, 0x15, 0x91, 0x46, 0x01, 0x5E, 0x2A, 0xBC, 0x94, 0x9F, 0x47, 0x29, 0xEE, 0x26, 0x32, 0xFE, 0x1E, 0xDB, 0x78, 0xD3, 0x37 },
+	{ 0x10, 0x15, 0xD7, 0x01, 0x08, 0xE0, 0x3B, 0xE1, 0xC7, 0x02, 0xFE, 0x97, 0x25, 0x36, 0x07, 0xD1, 0x4A, 0xEE, 0x59, 0x1F, 0x24, 0x13, 0xEA, 0x67, 0x87, 0x42, 0x7B, 0x64, 0x59, 0xFF, 0x21, 0x9A },
+	{ 0x3C, 0xA9, 0x89, 0xDE, 0x10, 0xCF, 0xE6, 0x09, 0x90, 0x94, 0x72, 0xC8, 0xD3, 0x56, 0x10, 0x80, 0x5B, 0x2F, 0x97, 0x77, 0x34, 0xCF, 0x65, 0x2C, 0xC6, 0x4B, 0x3B, 0xFC, 0x88, 0x2D, 0x5D, 0x89 },
+	{ 0xB6, 0x15, 0x6F, 0x72, 0xD3, 0x80, 0xEE, 0x9E, 0xA6, 0xAC, 0xD1, 0x90, 0x46, 0x4F, 0x23, 0x07, 0xA5, 0xC1, 0x79, 0xEF, 0x01, 0xFD, 0x71, 0xF9, 0x9F, 0x2D, 0x0F, 0x7A, 0x57, 0x36, 0x0A, 0xEA },
+	{ 0xC0, 0x3B, 0xC6, 0x42, 0xB2, 0x09, 0x59, 0xCB, 0xE1, 0x33, 0xA0, 0x30, 0x3E, 0x0C, 0x1A, 0xBF, 0xF3, 0xE3, 0x1E, 0xC8, 0xE1, 0xA3, 0x28, 0xEC, 0x85, 0x65, 0xC3, 0x6D, 0xEC, 0xFF, 0x52, 0x65 },
+	{ 0x2C, 0x3E, 0x08, 0x17, 0x6F, 0x76, 0x0C, 0x62, 0x64, 0xC3, 0xA2, 0xCD, 0x66, 0xFE, 0xC6, 0xC3, 0xD7, 0x8D, 0xE4, 0x3F, 0xC1, 0x92, 0x45, 0x7B, 0x2A, 0x4A, 0x66, 0x0A, 0x1E, 0x0E, 0xB2, 0x2B },
+	{ 0xF7, 0x38, 0xC0, 0x2F, 0x3C, 0x1B, 0x19, 0x0C, 0x51, 0x2B, 0x1A, 0x32, 0xDE, 0xAB, 0xF3, 0x53, 0x72, 0x8E, 0x0E, 0x9A, 0xB0, 0x34, 0x49, 0x0E, 0x3C, 0x34, 0x09, 0x94, 0x6A, 0x97, 0xAE, 0xEC },
+	{ 0x8B, 0x18, 0x80, 0xDF, 0x30, 0x1C, 0xC9, 0x63, 0x41, 0x88, 0x11, 0x08, 0x89, 0x64, 0x83, 0x92, 0x87, 0xFF, 0x7F, 0xE3, 0x1C, 0x49, 0xEA, 0x6E, 0xBD, 0x9E, 0x48, 0xBD, 0xEE, 0xE4, 0x97, 0xC5 },
+	{ 0x1E, 0x75, 0xCB, 0x21, 0xC6, 0x09, 0x89, 0x02, 0x03, 0x75, 0xF1, 0xA7, 0xA2, 0x42, 0x83, 0x9F, 0x0B, 0x0B, 0x68, 0x97, 0x3A, 0x4C, 0x2A, 0x05, 0xCF, 0x75, 0x55, 0xED, 0x5A, 0xAE, 0xC4, 0xC1 },
+	{ 0x62, 0xBF, 0x8A, 0x9C, 0x32, 0xA5, 0xBC, 0xCF, 0x29, 0x0B, 0x6C, 0x47, 0x4D, 0x75, 0xB2, 0xA2, 0xA4, 0x09, 0x3F, 0x1A, 0x9E, 0x27, 0x13, 0x94, 0x33, 0xA8, 0xF2, 0xB3, 0xBC, 0xE7, 0xB8, 0xD7 },
+	{ 0x16, 0x6C, 0x83, 0x50, 0xD3, 0x17, 0x3B, 0x5E, 0x70, 0x2B, 0x78, 0x3D, 0xFD, 0x33, 0xC6, 0x6E, 0xE0, 0x43, 0x27, 0x42, 0xE9, 0xB9, 0x2B, 0x99, 0x7F, 0xD2, 0x3C, 0x60, 0xDC, 0x67, 0x56, 0xCA },
+	{ 0x04, 0x4A, 0x14, 0xD8, 0x22, 0xA9, 0x0C, 0xAC, 0xF2, 0xF5, 0xA1, 0x01, 0x42, 0x8A, 0xDC, 0x8F, 0x41, 0x09, 0x38, 0x6C, 0xCB, 0x15, 0x8B, 0xF9, 0x05, 0xC8, 0x61, 0x8B, 0x8E, 0xE2, 0x4E, 0xC3 },
+	{ 0x38, 0x7D, 0x39, 0x7E, 0xA4, 0x3A, 0x99, 0x4B, 0xE8, 0x4D, 0x2D, 0x54, 0x4A, 0xFB, 0xE4, 0x81, 0xA2, 0x00, 0x0F, 0x55, 0x25, 0x26, 0x96, 0xBB, 0xA2, 0xC5, 0x0C, 0x8E, 0xBD, 0x10, 0x13, 0x47 },
+	{ 0x56, 0xF8, 0xCC, 0xF1, 0xF8, 0x64, 0x09, 0xB4, 0x6C, 0xE3, 0x61, 0x66, 0xAE, 0x91, 0x65, 0x13, 0x84, 0x41, 0x57, 0x75, 0x89, 0xDB, 0x08, 0xCB, 0xC5, 0xF6, 0x6C, 0xA2, 0x97, 0x43, 0xB9, 0xFD },
+	{ 0x97, 0x06, 0xC0, 0x92, 0xB0, 0x4D, 0x91, 0xF5, 0x3D, 0xFF, 0x91, 0xFA, 0x37, 0xB7, 0x49, 0x3D, 0x28, 0xB5, 0x76, 0xB5, 0xD7, 0x10, 0x46, 0x9D, 0xF7, 0x94, 0x01, 0x66, 0x22, 0x36, 0xFC, 0x03 },
+	{ 0x87, 0x79, 0x68, 0x68, 0x6C, 0x06, 0x8C, 0xE2, 0xF7, 0xE2, 0xAD, 0xCF, 0xF6, 0x8B, 0xF8, 0x74, 0x8E, 0xDF, 0x3C, 0xF8, 0x62, 0xCF, 0xB4, 0xD3, 0x94, 0x7A, 0x31, 0x06, 0x95, 0x80, 0x54, 0xE3 },
+	{ 0x88, 0x17, 0xE5, 0x71, 0x98, 0x79, 0xAC, 0xF7, 0x02, 0x47, 0x87, 0xEC, 0xCD, 0xB2, 0x71, 0x03, 0x55, 0x66, 0xCF, 0xA3, 0x33, 0xE0, 0x49, 0x40, 0x7C, 0x01, 0x78, 0xCC, 0xC5, 0x7A, 0x5B, 0x9F },
+	{ 0x89, 0x38, 0x24, 0x9E, 0x4B, 0x50, 0xCA, 0xDA, 0xCC, 0xDF, 0x5B, 0x18, 0x62, 0x13, 0x26, 0xCB, 0xB1, 0x52, 0x53, 0xE3, 0x3A, 0x20, 0xF5, 0x63, 0x6E, 0x99, 0x5D, 0x72, 0x47, 0x8D, 0xE4, 0x72 },
+	{ 0xF1, 0x64, 0xAB, 0xBA, 0x49, 0x63, 0xA4, 0x4D, 0x10, 0x72, 0x57, 0xE3, 0x23, 0x2D, 0x90, 0xAC, 0xA5, 0xE6, 0x6A, 0x14, 0x08, 0x24, 0x8C, 0x51, 0x74, 0x1E, 0x99, 0x1D, 0xB5, 0x22, 0x77, 0x56 },
+	{ 0xD0, 0x55, 0x63, 0xE2, 0xB1, 0xCB, 0xA0, 0xC4, 0xA2, 0xA1, 0xE8, 0xBD, 0xE3, 0xA1, 0xA0, 0xD9, 0xF5, 0xB4, 0x0C, 0x85, 0xA0, 0x70, 0xD6, 0xF5, 0xFB, 0x21, 0x06, 0x6E, 0xAD, 0x5D, 0x06, 0x01 },
+	{ 0x03, 0xFB, 0xB1, 0x63, 0x84, 0xF0, 0xA3, 0x86, 0x6F, 0x4C, 0x31, 0x17, 0x87, 0x76, 0x66, 0xEF, 0xBF, 0x12, 0x45, 0x97, 0x56, 0x4B, 0x29, 0x3D, 0x4A, 0xAB, 0x0D, 0x26, 0x9F, 0xAB, 0xDD, 0xFA },
+	{ 0x5F, 0xA8, 0x48, 0x6A, 0xC0, 0xE5, 0x29, 0x64, 0xD1, 0x88, 0x1B, 0xBE, 0x33, 0x8E, 0xB5, 0x4B, 0xE2, 0xF7, 0x19, 0x54, 0x92, 0x24, 0x89, 0x20, 0x57, 0xB4, 0xDA, 0x04, 0xBA, 0x8B, 0x34, 0x75 },
+	{ 0xCD, 0xFA, 0xBC, 0xEE, 0x46, 0x91, 0x11, 0x11, 0x23, 0x6A, 0x31, 0x70, 0x8B, 0x25, 0x39, 0xD7, 0x1F, 0xC2, 0x11, 0xD9, 0xB0, 0x9C, 0x0D, 0x85, 0x30, 0xA1, 0x1E, 0x1D, 0xBF, 0x6E, 0xED, 0x01 },
+	{ 0x4F, 0x82, 0xDE, 0x03, 0xB9, 0x50, 0x47, 0x93, 0xB8, 0x2A, 0x07, 0xA0, 0xBD, 0xCD, 0xFF, 0x31, 0x4D, 0x75, 0x9E, 0x7B, 0x62, 0xD2, 0x6B, 0x78, 0x49, 0x46, 0xB0, 0xD3, 0x6F, 0x91, 0x6F, 0x52 },
+	{ 0x25, 0x9E, 0xC7, 0xF1, 0x73, 0xBC, 0xC7, 0x6A, 0x09, 0x94, 0xC9, 0x67, 0xB4, 0xF5, 0xF0, 0x24, 0xC5, 0x60, 0x57, 0xFB, 0x79, 0xC9, 0x65, 0xC4, 0xFA, 0xE4, 0x18, 0x75, 0xF0, 0x6A, 0x0E, 0x4C },
+	{ 0x19, 0x3C, 0xC8, 0xE7, 0xC3, 0xE0, 0x8B, 0xB3, 0x0F, 0x54, 0x37, 0xAA, 0x27, 0xAD, 0xE1, 0xF1, 0x42, 0x36, 0x9B, 0x24, 0x6A, 0x67, 0x5B, 0x23, 0x83, 0xE6, 0xDA, 0x9B, 0x49, 0xA9, 0x80, 0x9E },
+	{ 0x5C, 0x10, 0x89, 0x6F, 0x0E, 0x28, 0x56, 0xB2, 0xA2, 0xEE, 0xE0, 0xFE, 0x4A, 0x2C, 0x16, 0x33, 0x56, 0x5D, 0x18, 0xF0, 0xE9, 0x3E, 0x1F, 0xAB, 0x26, 0xC3, 0x73, 0xE8, 0xF8, 0x29, 0x65, 0x4D },
+	{ 0xF1, 0x60, 0x12, 0xD9, 0x3F, 0x28, 0x85, 0x1A, 0x1E, 0xB9, 0x89, 0xF5, 0xD0, 0xB4, 0x3F, 0x3F, 0x39, 0xCA, 0x73, 0xC9, 0xA6, 0x2D, 0x51, 0x81, 0xBF, 0xF2, 0x37, 0x53, 0x6B, 0xD3, 0x48, 0xC3 },
+	{ 0x29, 0x66, 0xB3, 0xCF, 0xAE, 0x1E, 0x44, 0xEA, 0x99, 0x6D, 0xC5, 0xD6, 0x86, 0xCF, 0x25, 0xFA, 0x05, 0x3F, 0xB6, 0xF6, 0x72, 0x01, 0xB9, 0xE4, 0x6E, 0xAD, 0xE8, 0x5D, 0x0A, 0xD6, 0xB8, 0x06 },
+	{ 0xDD, 0xB8, 0x78, 0x24, 0x85, 0xE9, 0x00, 0xBC, 0x60, 0xBC, 0xF4, 0xC3, 0x3A, 0x6F, 0xD5, 0x85, 0x68, 0x0C, 0xC6, 0x83, 0xD5, 0x16, 0xEF, 0xA0, 0x3E, 0xB9, 0x98, 0x5F, 0xAD, 0x87, 0x15, 0xFB },
+	{ 0x4C, 0x4D, 0x6E, 0x71, 0xAE, 0xA0, 0x57, 0x86, 0x41, 0x31, 0x48, 0xFC, 0x7A, 0x78, 0x6B, 0x0E, 0xCA, 0xF5, 0x82, 0xCF, 0xF1, 0x20, 0x9F, 0x5A, 0x80, 0x9F, 0xBA, 0x85, 0x04, 0xCE, 0x66, 0x2C },
+	{ 0xFB, 0x4C, 0x5E, 0x86, 0xD7, 0xB2, 0x22, 0x9B, 0x99, 0xB8, 0xBA, 0x6D, 0x94, 0xC2, 0x47, 0xEF, 0x96, 0x4A, 0xA3, 0xA2, 0xBA, 0xE8, 0xED, 0xC7, 0x75, 0x69, 0xF2, 0x8D, 0xBB, 0xFF, 0x2D, 0x4E },
+	{ 0xE9, 0x4F, 0x52, 0x6D, 0xE9, 0x01, 0x96, 0x33, 0xEC, 0xD5, 0x4A, 0xC6, 0x12, 0x0F, 0x23, 0x95, 0x8D, 0x77, 0x18, 0xF1, 0xE7, 0x71, 0x7B, 0xF3, 0x29, 0x21, 0x1A, 0x4F, 0xAE, 0xED, 0x4E, 0x6D },
+	{ 0xCB, 0xD6, 0x66, 0x0A, 0x10, 0xDB, 0x3F, 0x23, 0xF7, 0xA0, 0x3D, 0x4B, 0x9D, 0x40, 0x44, 0xC7, 0x93, 0x2B, 0x28, 0x01, 0xAC, 0x89, 0xD6, 0x0B, 0xC9, 0xEB, 0x92, 0xD6, 0x5A, 0x46, 0xC2, 0xA0 },
+	{ 0x88, 0x18, 0xBB, 0xD3, 0xDB, 0x4D, 0xC1, 0x23, 0xB2, 0x5C, 0xBB, 0xA5, 0xF5, 0x4C, 0x2B, 0xC4, 0xB3, 0xFC, 0xF9, 0xBF, 0x7D, 0x7A, 0x77, 0x09, 0xF4, 0xAE, 0x58, 0x8B, 0x26, 0x7C, 0x4E, 0xCE },
+	{ 0xC6, 0x53, 0x82, 0x51, 0x3F, 0x07, 0x46, 0x0D, 0xA3, 0x98, 0x33, 0xCB, 0x66, 0x6C, 0x5E, 0xD8, 0x2E, 0x61, 0xB9, 0xE9, 0x98, 0xF4, 0xB0, 0xC4, 0x28, 0x7C, 0xEE, 0x56, 0xC3, 0xCC, 0x9B, 0xCD },
+	{ 0x89, 0x75, 0xB0, 0x57, 0x7F, 0xD3, 0x55, 0x66, 0xD7, 0x50, 0xB3, 0x62, 0xB0, 0x89, 0x7A, 0x26, 0xC3, 0x99, 0x13, 0x6D, 0xF0, 0x7B, 0xAB, 0xAB, 0xBD, 0xE6, 0x20, 0x3F, 0xF2, 0x95, 0x4E, 0xD4 },
+	{ 0x21, 0xFE, 0x0C, 0xEB, 0x00, 0x52, 0xBE, 0x7F, 0xB0, 0xF0, 0x04, 0x18, 0x7C, 0xAC, 0xD7, 0xDE, 0x67, 0xFA, 0x6E, 0xB0, 0x93, 0x8D, 0x92, 0x76, 0x77, 0xF2, 0x39, 0x8C, 0x13, 0x23, 0x17, 0xA8 },
+	{ 0x2E, 0xF7, 0x3F, 0x3C, 0x26, 0xF1, 0x2D, 0x93, 0x88, 0x9F, 0x3C, 0x78, 0xB6, 0xA6, 0x6C, 0x1D, 0x52, 0xB6, 0x49, 0xDC, 0x9E, 0x85, 0x6E, 0x2C, 0x17, 0x2E, 0xA7, 0xC5, 0x8A, 0xC2, 0xB5, 0xE3 },
+	{ 0x38, 0x8A, 0x3C, 0xD5, 0x6D, 0x73, 0x86, 0x7A, 0xBB, 0x5F, 0x84, 0x01, 0x49, 0x2B, 0x6E, 0x26, 0x81, 0xEB, 0x69, 0x85, 0x1E, 0x76, 0x7F, 0xD8, 0x42, 0x10, 0xA5, 0x60, 0x76, 0xFB, 0x3D, 0xD3 },
+	{ 0xAF, 0x53, 0x3E, 0x02, 0x2F, 0xC9, 0x43, 0x9E, 0x4E, 0x3C, 0xB8, 0x38, 0xEC, 0xD1, 0x86, 0x92, 0x23, 0x2A, 0xDF, 0x6F, 0xE9, 0x83, 0x95, 0x26, 0xD3, 0xC3, 0xDD, 0x1B, 0x71, 0x91, 0x0B, 0x1A },
+	{ 0x75, 0x1C, 0x09, 0xD4, 0x1A, 0x93, 0x43, 0x88, 0x2A, 0x81, 0xCD, 0x13, 0xEE, 0x40, 0x81, 0x8D, 0x12, 0xEB, 0x44, 0xC6, 0xC7, 0xF4, 0x0D, 0xF1, 0x6E, 0x4A, 0xEA, 0x8F, 0xAB, 0x91, 0x97, 0x2A },
+	{ 0x5B, 0x73, 0xDD, 0xB6, 0x8D, 0x9D, 0x2B, 0x0A, 0xA2, 0x65, 0xA0, 0x79, 0x88, 0xD6, 0xB8, 0x8A, 0xE9, 0xAA, 0xC5, 0x82, 0xAF, 0x83, 0x03, 0x2F, 0x8A, 0x9B, 0x21, 0xA2, 0xE1, 0xB7, 0xBF, 0x18 },
+	{ 0x3D, 0xA2, 0x91, 0x26, 0xC7, 0xC5, 0xD7, 0xF4, 0x3E, 0x64, 0x24, 0x2A, 0x79, 0xFE, 0xAA, 0x4E, 0xF3, 0x45, 0x9C, 0xDE, 0xCC, 0xC8, 0x98, 0xED, 0x59, 0xA9, 0x7F, 0x6E, 0xC9, 0x3B, 0x9D, 0xAB },
+	{ 0x56, 0x6D, 0xC9, 0x20, 0x29, 0x3D, 0xA5, 0xCB, 0x4F, 0xE0, 0xAA, 0x8A, 0xBD, 0xA8, 0xBB, 0xF5, 0x6F, 0x55, 0x23, 0x13, 0xBF, 0xF1, 0x90, 0x46, 0x64, 0x1E, 0x36, 0x15, 0xC1, 0xE3, 0xED, 0x3F },
+	{ 0x41, 0x15, 0xBE, 0xA0, 0x2F, 0x73, 0xF9, 0x7F, 0x62, 0x9E, 0x5C, 0x55, 0x90, 0x72, 0x0C, 0x01, 0xE7, 0xE4, 0x49, 0xAE, 0x2A, 0x66, 0x97, 0xD4, 0xD2, 0x78, 0x33, 0x21, 0x30, 0x36, 0x92, 0xF9 },
+	{ 0x4C, 0xE0, 0x8F, 0x47, 0x62, 0x46, 0x8A, 0x76, 0x70, 0x01, 0x21, 0x64, 0x87, 0x8D, 0x68, 0x34, 0x0C, 0x52, 0xA3, 0x5E, 0x66, 0xC1, 0x88, 0x4D, 0x5C, 0x86, 0x48, 0x89, 0xAB, 0xC9, 0x66, 0x77 },
+	{ 0x81, 0xEA, 0x0B, 0x78, 0x04, 0x12, 0x4E, 0x0C, 0x22, 0xEA, 0x5F, 0xC7, 0x11, 0x04, 0xA2, 0xAF, 0xCB, 0x52, 0xA1, 0xFA, 0x81, 0x6F, 0x3E, 0xCB, 0x7D, 0xCB, 0x5D, 0x9D, 0xEA, 0x17, 0x86, 0xD0 },
+	{ 0xFE, 0x36, 0x27, 0x33, 0xB0, 0x5F, 0x6B, 0xED, 0xAF, 0x93, 0x79, 0xD7, 0xF7, 0x93, 0x6E, 0xDE, 0x20, 0x9B, 0x1F, 0x83, 0x23, 0xC3, 0x92, 0x25, 0x49, 0xD9, 0xE7, 0x36, 0x81, 0xB5, 0xDB, 0x7B },
+	{ 0xEF, 0xF3, 0x7D, 0x30, 0xDF, 0xD2, 0x03, 0x59, 0xBE, 0x4E, 0x73, 0xFD, 0xF4, 0x0D, 0x27, 0x73, 0x4B, 0x3D, 0xF9, 0x0A, 0x97, 0xA5, 0x5E, 0xD7, 0x45, 0x29, 0x72, 0x94, 0xCA, 0x85, 0xD0, 0x9F },
+	{ 0x17, 0x2F, 0xFC, 0x67, 0x15, 0x3D, 0x12, 0xE0, 0xCA, 0x76, 0xA8, 0xB6, 0xCD, 0x5D, 0x47, 0x31, 0x88, 0x5B, 0x39, 0xCE, 0x0C, 0xAC, 0x93, 0xA8, 0x97, 0x2A, 0x18, 0x00, 0x6C, 0x8B, 0x8B, 0xAF },
+	{ 0xC4, 0x79, 0x57, 0xF1, 0xCC, 0x88, 0xE8, 0x3E, 0xF9, 0x44, 0x58, 0x39, 0x70, 0x9A, 0x48, 0x0A, 0x03, 0x6B, 0xED, 0x5F, 0x88, 0xAC, 0x0F, 0xCC, 0x8E, 0x1E, 0x70, 0x3F, 0xFA, 0xAC, 0x13, 0x2C },
+	{ 0x30, 0xF3, 0x54, 0x83, 0x70, 0xCF, 0xDC, 0xED, 0xA5, 0xC3, 0x7B, 0x56, 0x9B, 0x61, 0x75, 0xE7, 0x99, 0xEE, 0xF1, 0xA6, 0x2A, 0xAA, 0x94, 0x32, 0x45, 0xAE, 0x76, 0x69, 0xC2, 0x27, 0xA7, 0xB5 },
+	{ 0xC9, 0x5D, 0xCB, 0x3C, 0xF1, 0xF2, 0x7D, 0x0E, 0xEF, 0x2F, 0x25, 0xD2, 0x41, 0x38, 0x70, 0x90, 0x4A, 0x87, 0x7C, 0x4A, 0x56, 0xC2, 0xDE, 0x1E, 0x83, 0xE2, 0xBC, 0x2A, 0xE2, 0xE4, 0x68, 0x21 },
+	{ 0xD5, 0xD0, 0xB5, 0xD7, 0x05, 0x43, 0x4C, 0xD4, 0x6B, 0x18, 0x57, 0x49, 0xF6, 0x6B, 0xFB, 0x58, 0x36, 0xDC, 0xDF, 0x6E, 0xE5, 0x49, 0xA2, 0xB7, 0xA4, 0xAE, 0xE7, 0xF5, 0x80, 0x07, 0xCA, 0xAF },
+	{ 0xBB, 0xC1, 0x24, 0xA7, 0x12, 0xF1, 0x5D, 0x07, 0xC3, 0x00, 0xE0, 0x5B, 0x66, 0x83, 0x89, 0xA4, 0x39, 0xC9, 0x17, 0x77, 0xF7, 0x21, 0xF8, 0x32, 0x0C, 0x1C, 0x90, 0x78, 0x06, 0x6D, 0x2C, 0x7E },
+	{ 0xA4, 0x51, 0xB4, 0x8C, 0x35, 0xA6, 0xC7, 0x85, 0x4C, 0xFA, 0xAE, 0x60, 0x26, 0x2E, 0x76, 0x99, 0x08, 0x16, 0x38, 0x2A, 0xC0, 0x66, 0x7E, 0x5A, 0x5C, 0x9E, 0x1B, 0x46, 0xC4, 0x34, 0x2D, 0xDF },
+	{ 0xB0, 0xD1, 0x50, 0xFB, 0x55, 0xE7, 0x78, 0xD0, 0x11, 0x47, 0xF0, 0xB5, 0xD8, 0x9D, 0x99, 0xEC, 0xB2, 0x0F, 0xF0, 0x7E, 0x5E, 0x67, 0x60, 0xD6, 0xB6, 0x45, 0xEB, 0x5B, 0x65, 0x4C, 0x62, 0x2B },
+	{ 0x34, 0xF7, 0x37, 0xC0, 0xAB, 0x21, 0x99, 0x51, 0xEE, 0xE8, 0x9A, 0x9F, 0x8D, 0xAC, 0x29, 0x9C, 0x9D, 0x4C, 0x38, 0xF3, 0x3F, 0xA4, 0x94, 0xC5, 0xC6, 0xEE, 0xFC, 0x92, 0xB6, 0xDB, 0x08, 0xBC },
+	{ 0x1A, 0x62, 0xCC, 0x3A, 0x00, 0x80, 0x0D, 0xCB, 0xD9, 0x98, 0x91, 0x08, 0x0C, 0x1E, 0x09, 0x84, 0x58, 0x19, 0x3A, 0x8C, 0xC9, 0xF9, 0x70, 0xEA, 0x99, 0xFB, 0xEF, 0xF0, 0x03, 0x18, 0xC2, 0x89 },
+	{ 0xCF, 0xCE, 0x55, 0xEB, 0xAF, 0xC8, 0x40, 0xD7, 0xAE, 0x48, 0x28, 0x1C, 0x7F, 0xD5, 0x7E, 0xC8, 0xB4, 0x82, 0xD4, 0xB7, 0x04, 0x43, 0x74, 0x95, 0x49, 0x5A, 0xC4, 0x14, 0xCF, 0x4A, 0x37, 0x4B },
+	{ 0x67, 0x46, 0xFA, 0xCF, 0x71, 0x14, 0x6D, 0x99, 0x9D, 0xAB, 0xD0, 0x5D, 0x09, 0x3A, 0xE5, 0x86, 0x64, 0x8D, 0x1E, 0xE2, 0x8E, 0x72, 0x61, 0x7B, 0x99, 0xD0, 0xF0, 0x08, 0x6E, 0x1E, 0x45, 0xBF },
+	{ 0x57, 0x1C, 0xED, 0x28, 0x3B, 0x3F, 0x23, 0xB4, 0xE7, 0x50, 0xBF, 0x12, 0xA2, 0xCA, 0xF1, 0x78, 0x18, 0x47, 0xBD, 0x89, 0x0E, 0x43, 0x60, 0x3C, 0xDC, 0x59, 0x76, 0x10, 0x2B, 0x7B, 0xB1, 0x1B },
+	{ 0xCF, 0xCB, 0x76, 0x5B, 0x04, 0x8E, 0x35, 0x02, 0x2C, 0x5D, 0x08, 0x9D, 0x26, 0xE8, 0x5A, 0x36, 0xB0, 0x05, 0xA2, 0xB8, 0x04, 0x93, 0xD0, 0x3A, 0x14, 0x4E, 0x09, 0xF4, 0x09, 0xB6, 0xAF, 0xD1 },
+	{ 0x40, 0x50, 0xC7, 0xA2, 0x77, 0x05, 0xBB, 0x27, 0xF4, 0x20, 0x89, 0xB2, 0x99, 0xF3, 0xCB, 0xE5, 0x05, 0x4E, 0xAD, 0x68, 0x72, 0x7E, 0x8E, 0xF9, 0x31, 0x8C, 0xE6, 0xF2, 0x5C, 0xD6, 0xF3, 0x1D },
+	{ 0x18, 0x40, 0x70, 0xBD, 0x5D, 0x26, 0x5F, 0xBD, 0xC1, 0x42, 0xCD, 0x1C, 0x5C, 0xD0, 0xD7, 0xE4, 0x14, 0xE7, 0x03, 0x69, 0xA2, 0x66, 0xD6, 0x27, 0xC8, 0xFB, 0xA8, 0x4F, 0xA5, 0xE8, 0x4C, 0x34 },
+	{ 0x9E, 0xDD, 0xA9, 0xA4, 0x44, 0x39, 0x02, 0xA9, 0x58, 0x8C, 0x0D, 0x0C, 0xCC, 0x62, 0xB9, 0x30, 0x21, 0x84, 0x79, 0xA6, 0x84, 0x1E, 0x6F, 0xE7, 0xD4, 0x30, 0x03, 0xF0, 0x4B, 0x1F, 0xD6, 0x43 },
+	{ 0xE4, 0x12, 0xFE, 0xEF, 0x79, 0x08, 0x32, 0x4A, 0x6D, 0xA1, 0x84, 0x16, 0x29, 0xF3, 0x5D, 0x3D, 0x35, 0x86, 0x42, 0x01, 0x93, 0x10, 0xEC, 0x57, 0xC6, 0x14, 0x83, 0x6B, 0x63, 0xD3, 0x07, 0x63 },
+	{ 0x1A, 0x2B, 0x8E, 0xDF, 0xF3, 0xF9, 0xAC, 0xC1, 0x55, 0x4F, 0xCB, 0xAE, 0x3C, 0xF1, 0xD6, 0x29, 0x8C, 0x64, 0x62, 0xE2, 0x2E, 0x5E, 0xB0, 0x25, 0x96, 0x84, 0xF8, 0x35, 0x01, 0x2B, 0xD1, 0x3F },
+	{ 0x28, 0x8C, 0x4A, 0xD9, 0xB9, 0x40, 0x97, 0x62, 0xEA, 0x07, 0xC2, 0x4A, 0x41, 0xF0, 0x4F, 0x69, 0xA7, 0xD7, 0x4B, 0xEE, 0x2D, 0x95, 0x43, 0x53, 0x74, 0xBD, 0xE9, 0x46, 0xD7, 0x24, 0x1C, 0x7B },
+	{ 0x80, 0x56, 0x91, 0xBB, 0x28, 0x67, 0x48, 0xCF, 0xB5, 0x91, 0xD3, 0xAE, 0xBE, 0x7E, 0x6F, 0x4E, 0x4D, 0xC6, 0xE2, 0x80, 0x8C, 0x65, 0x14, 0x3C, 0xC0, 0x04, 0xE4, 0xEB, 0x6F, 0xD0, 0x9D, 0x43 },
+	{ 0xD4, 0xAC, 0x8D, 0x3A, 0x0A, 0xFC, 0x6C, 0xFA, 0x7B, 0x46, 0x0A, 0xE3, 0x00, 0x1B, 0xAE, 0xB3, 0x6D, 0xAD, 0xB3, 0x7D, 0xA0, 0x7D, 0x2E, 0x8A, 0xC9, 0x18, 0x22, 0xDF, 0x34, 0x8A, 0xED, 0x3D },
+	{ 0xC3, 0x76, 0x61, 0x70, 0x14, 0xD2, 0x01, 0x58, 0xBC, 0xED, 0x3D, 0x3B, 0xA5, 0x52, 0xB6, 0xEC, 0xCF, 0x84, 0xE6, 0x2A, 0xA3, 0xEB, 0x65, 0x0E, 0x90, 0x02, 0x9C, 0x84, 0xD1, 0x3E, 0xEA, 0x69 },
+	{ 0xC4, 0x1F, 0x09, 0xF4, 0x3C, 0xEC, 0xAE, 0x72, 0x93, 0xD6, 0x00, 0x7C, 0xA0, 0xA3, 0x57, 0x08, 0x7D, 0x5A, 0xE5, 0x9B, 0xE5, 0x00, 0xC1, 0xCD, 0x5B, 0x28, 0x9E, 0xE8, 0x10, 0xC7, 0xB0, 0x82 },
+	{ 0x03, 0xD1, 0xCE, 0xD1, 0xFB, 0xA5, 0xC3, 0x91, 0x55, 0xC4, 0x4B, 0x77, 0x65, 0xCB, 0x76, 0x0C, 0x78, 0x70, 0x8D, 0xCF, 0xC8, 0x0B, 0x0B, 0xD8, 0xAD, 0xE3, 0xA5, 0x6D, 0xA8, 0x83, 0x0B, 0x29 },
+	{ 0x09, 0xBD, 0xE6, 0xF1, 0x52, 0x21, 0x8D, 0xC9, 0x2C, 0x41, 0xD7, 0xF4, 0x53, 0x87, 0xE6, 0x3E, 0x58, 0x69, 0xD8, 0x07, 0xEC, 0x70, 0xB8, 0x21, 0x40, 0x5D, 0xBD, 0x88, 0x4B, 0x7F, 0xCF, 0x4B },
+	{ 0x71, 0xC9, 0x03, 0x6E, 0x18, 0x17, 0x9B, 0x90, 0xB3, 0x7D, 0x39, 0xE9, 0xF0, 0x5E, 0xB8, 0x9C, 0xC5, 0xFC, 0x34, 0x1F, 0xD7, 0xC4, 0x77, 0xD0, 0xD7, 0x49, 0x32, 0x85, 0xFA, 0xCA, 0x08, 0xA4 },
+	{ 0x59, 0x16, 0x83, 0x3E, 0xBB, 0x05, 0xCD, 0x91, 0x9C, 0xA7, 0xFE, 0x83, 0xB6, 0x92, 0xD3, 0x20, 0x5B, 0xEF, 0x72, 0x39, 0x2B, 0x2C, 0xF6, 0xBB, 0x0A, 0x6D, 0x43, 0xF9, 0x94, 0xF9, 0x5F, 0x11 },
+	{ 0xF6, 0x3A, 0xAB, 0x3E, 0xC6, 0x41, 0xB3, 0xB0, 0x24, 0x96, 0x4C, 0x2B, 0x43, 0x7C, 0x04, 0xF6, 0x04, 0x3C, 0x4C, 0x7E, 0x02, 0x79, 0x23, 0x99, 0x95, 0x40, 0x19, 0x58, 0xF8, 0x6B, 0xBE, 0x54 },
+	{ 0xF1, 0x72, 0xB1, 0x80, 0xBF, 0xB0, 0x97, 0x40, 0x49, 0x31, 0x20, 0xB6, 0x32, 0x6C, 0xBD, 0xC5, 0x61, 0xE4, 0x77, 0xDE, 0xF9, 0xBB, 0xCF, 0xD2, 0x8C, 0xC8, 0xC1, 0xC5, 0xE3, 0x37, 0x9A, 0x31 },
+	{ 0xCB, 0x9B, 0x89, 0xCC, 0x18, 0x38, 0x1D, 0xD9, 0x14, 0x1A, 0xDE, 0x58, 0x86, 0x54, 0xD4, 0xE6, 0xA2, 0x31, 0xD5, 0xBF, 0x49, 0xD4, 0xD5, 0x9A, 0xC2, 0x7D, 0x86, 0x9C, 0xBE, 0x10, 0x0C, 0xF3 },
+	{ 0x7B, 0xD8, 0x81, 0x50, 0x46, 0xFD, 0xD8, 0x10, 0xA9, 0x23, 0xE1, 0x98, 0x4A, 0xAE, 0xBD, 0xCD, 0xF8, 0x4D, 0x87, 0xC8, 0x99, 0x2D, 0x68, 0xB5, 0xEE, 0xB4, 0x60, 0xF9, 0x3E, 0xB3, 0xC8, 0xD7 },
+	{ 0x60, 0x7B, 0xE6, 0x68, 0x62, 0xFD, 0x08, 0xEE, 0x5B, 0x19, 0xFA, 0xCA, 0xC0, 0x9D, 0xFD, 0xBC, 0xD4, 0x0C, 0x31, 0x21, 0x01, 0xD6, 0x6E, 0x6E, 0xBD, 0x2B, 0x84, 0x1F, 0x1B, 0x9A, 0x93, 0x25 },
+	{ 0x9F, 0xE0, 0x3B, 0xBE, 0x69, 0xAB, 0x18, 0x34, 0xF5, 0x21, 0x9B, 0x0D, 0xA8, 0x8A, 0x08, 0xB3, 0x0A, 0x66, 0xC5, 0x91, 0x3F, 0x01, 0x51, 0x96, 0x3C, 0x36, 0x05, 0x60, 0xDB, 0x03, 0x87, 0xB3 },
+	{ 0x90, 0xA8, 0x35, 0x85, 0x71, 0x7B, 0x75, 0xF0, 0xE9, 0xB7, 0x25, 0xE0, 0x55, 0xEE, 0xEE, 0xB9, 0xE7, 0xA0, 0x28, 0xEA, 0x7E, 0x6C, 0xBC, 0x07, 0xB2, 0x09, 0x17, 0xEC, 0x03, 0x63, 0xE3, 0x8C },
+	{ 0x33, 0x6E, 0xA0, 0x53, 0x0F, 0x4A, 0x74, 0x69, 0x12, 0x6E, 0x02, 0x18, 0x58, 0x7E, 0xBB, 0xDE, 0x33, 0x58, 0xA0, 0xB3, 0x1C, 0x29, 0xD2, 0x00, 0xF7, 0xDC, 0x7E, 0xB1, 0x5C, 0x6A, 0xAD, 0xD8 },
+	{ 0xA7, 0x9E, 0x76, 0xDC, 0x0A, 0xBC, 0xA4, 0x39, 0x6F, 0x07, 0x47, 0xCD, 0x7B, 0x74, 0x8D, 0xF9, 0x13, 0x00, 0x76, 0x26, 0xB1, 0xD6, 0x59, 0xDA, 0x0C, 0x1F, 0x78, 0xB9, 0x30, 0x3D, 0x01, 0xA3 },
+	{ 0x44, 0xE7, 0x8A, 0x77, 0x37, 0x56, 0xE0, 0x95, 0x15, 0x19, 0x50, 0x4D, 0x70, 0x38, 0xD2, 0x8D, 0x02, 0x13, 0xA3, 0x7E, 0x0C, 0xE3, 0x75, 0x37, 0x17, 0x57, 0xBC, 0x99, 0x63, 0x11, 0xE3, 0xB8 },
+	{ 0x77, 0xAC, 0x01, 0x2A, 0x3F, 0x75, 0x4D, 0xCF, 0xEA, 0xB5, 0xEB, 0x99, 0x6B, 0xE9, 0xCD, 0x2D, 0x1F, 0x96, 0x11, 0x1B, 0x6E, 0x49, 0xF3, 0x99, 0x4D, 0xF1, 0x81, 0xF2, 0x85, 0x69, 0xD8, 0x25 },
+	{ 0xCE, 0x5A, 0x10, 0xDB, 0x6F, 0xCC, 0xDA, 0xF1, 0x40, 0xAA, 0xA4, 0xDE, 0xD6, 0x25, 0x0A, 0x9C, 0x06, 0xE9, 0x22, 0x2B, 0xC9, 0xF9, 0xF3, 0x65, 0x8A, 0x4A, 0xFF, 0x93, 0x5F, 0x2B, 0x9F, 0x3A },
+	{ 0xEC, 0xC2, 0x03, 0xA7, 0xFE, 0x2B, 0xE4, 0xAB, 0xD5, 0x5B, 0xB5, 0x3E, 0x6E, 0x67, 0x35, 0x72, 0xE0, 0x07, 0x8D, 0xA8, 0xCD, 0x37, 0x5E, 0xF4, 0x30, 0xCC, 0x97, 0xF9, 0xF8, 0x00, 0x83, 0xAF },
+	{ 0x14, 0xA5, 0x18, 0x6D, 0xE9, 0xD7, 0xA1, 0x8B, 0x04, 0x12, 0xB8, 0x56, 0x3E, 0x51, 0xCC, 0x54, 0x33, 0x84, 0x0B, 0x4A, 0x12, 0x9A, 0x8F, 0xF9, 0x63, 0xB3, 0x3A, 0x3C, 0x4A, 0xFE, 0x8E, 0xBB },
+	{ 0x13, 0xF8, 0xEF, 0x95, 0xCB, 0x86, 0xE6, 0xA6, 0x38, 0x93, 0x1C, 0x8E, 0x10, 0x76, 0x73, 0xEB, 0x76, 0xBA, 0x10, 0xD7, 0xC2, 0xCD, 0x70, 0xB9, 0xD9, 0x92, 0x0B, 0xBE, 0xED, 0x92, 0x94, 0x09 },
+	{ 0x0B, 0x33, 0x8F, 0x4E, 0xE1, 0x2F, 0x2D, 0xFC, 0xB7, 0x87, 0x13, 0x37, 0x79, 0x41, 0xE0, 0xB0, 0x63, 0x21, 0x52, 0x58, 0x1D, 0x13, 0x32, 0x51, 0x6E, 0x4A, 0x2C, 0xAB, 0x19, 0x42, 0xCC, 0xA4 },
+	{ 0xEA, 0xAB, 0x0E, 0xC3, 0x7B, 0x3B, 0x8A, 0xB7, 0x96, 0xE9, 0xF5, 0x72, 0x38, 0xDE, 0x14, 0xA2, 0x64, 0xA0, 0x76, 0xF3, 0x88, 0x7D, 0x86, 0xE2, 0x9B, 0xB5, 0x90, 0x6D, 0xB5, 0xA0, 0x0E, 0x02 },
+	{ 0x23, 0xCB, 0x68, 0xB8, 0xC0, 0xE6, 0xDC, 0x26, 0xDC, 0x27, 0x76, 0x6D, 0xDC, 0x0A, 0x13, 0xA9, 0x94, 0x38, 0xFD, 0x55, 0x61, 0x7A, 0xA4, 0x09, 0x5D, 0x8F, 0x96, 0x97, 0x20, 0xC8, 0x72, 0xDF },
+	{ 0x09, 0x1D, 0x8E, 0xE3, 0x0D, 0x6F, 0x29, 0x68, 0xD4, 0x6B, 0x68, 0x7D, 0xD6, 0x52, 0x92, 0x66, 0x57, 0x42, 0xDE, 0x0B, 0xB8, 0x3D, 0xCC, 0x00, 0x04, 0xC7, 0x2C, 0xE1, 0x00, 0x07, 0xA5, 0x49 },
+	{ 0x7F, 0x50, 0x7A, 0xBC, 0x6D, 0x19, 0xBA, 0x00, 0xC0, 0x65, 0xA8, 0x76, 0xEC, 0x56, 0x57, 0x86, 0x88, 0x82, 0xD1, 0x8A, 0x22, 0x1B, 0xC4, 0x6C, 0x7A, 0x69, 0x12, 0x54, 0x1F, 0x5B, 0xC7, 0xBA },
+	{ 0xA0, 0x60, 0x7C, 0x24, 0xE1, 0x4E, 0x8C, 0x22, 0x3D, 0xB0, 0xD7, 0x0B, 0x4D, 0x30, 0xEE, 0x88, 0x01, 0x4D, 0x60, 0x3F, 0x43, 0x7E, 0x9E, 0x02, 0xAA, 0x7D, 0xAF, 0xA3, 0xCD, 0xFB, 0xAD, 0x94 },
+	{ 0xDD, 0xBF, 0xEA, 0x75, 0xCC, 0x46, 0x78, 0x82, 0xEB, 0x34, 0x83, 0xCE, 0x5E, 0x2E, 0x75, 0x6A, 0x4F, 0x47, 0x01, 0xB7, 0x6B, 0x44, 0x55, 0x19, 0xE8, 0x9F, 0x22, 0xD6, 0x0F, 0xA8, 0x6E, 0x06 },
+	{ 0x0C, 0x31, 0x1F, 0x38, 0xC3, 0x5A, 0x4F, 0xB9, 0x0D, 0x65, 0x1C, 0x28, 0x9D, 0x48, 0x68, 0x56, 0xCD, 0x14, 0x13, 0xDF, 0x9B, 0x06, 0x77, 0xF5, 0x3E, 0xCE, 0x2C, 0xD9, 0xE4, 0x77, 0xC6, 0x0A },
+	{ 0x46, 0xA7, 0x3A, 0x8D, 0xD3, 0xE7, 0x0F, 0x59, 0xD3, 0x94, 0x2C, 0x01, 0xDF, 0x59, 0x9D, 0xEF, 0x78, 0x3C, 0x9D, 0xA8, 0x2F, 0xD8, 0x32, 0x22, 0xCD, 0x66, 0x2B, 0x53, 0xDC, 0xE7, 0xDB, 0xDF },
+	{ 0xAD, 0x03, 0x8F, 0xF9, 0xB1, 0x4D, 0xE8, 0x4A, 0x80, 0x1E, 0x4E, 0x62, 0x1C, 0xE5, 0xDF, 0x02, 0x9D, 0xD9, 0x35, 0x20, 0xD0, 0xC2, 0xFA, 0x38, 0xBF, 0xF1, 0x76, 0xA8, 0xB1, 0xD1, 0x69, 0x8C },
+	{ 0xAB, 0x70, 0xC5, 0xDF, 0xBD, 0x1E, 0xA8, 0x17, 0xFE, 0xD0, 0xCD, 0x06, 0x72, 0x93, 0xAB, 0xF3, 0x19, 0xE5, 0xD7, 0x90, 0x1C, 0x21, 0x41, 0xD5, 0xD9, 0x9B, 0x23, 0xF0, 0x3A, 0x38, 0xE7, 0x48 },
+	{ 0x1F, 0xFF, 0xDA, 0x67, 0x93, 0x2B, 0x73, 0xC8, 0xEC, 0xAF, 0x00, 0x9A, 0x34, 0x91, 0xA0, 0x26, 0x95, 0x3B, 0xAB, 0xFE, 0x1F, 0x66, 0x3B, 0x06, 0x97, 0xC3, 0xC4, 0xAE, 0x8B, 0x2E, 0x7D, 0xCB },
+	{ 0xB0, 0xD2, 0xCC, 0x19, 0x47, 0x2D, 0xD5, 0x7F, 0x2B, 0x17, 0xEF, 0xC0, 0x3C, 0x8D, 0x58, 0xC2, 0x28, 0x3D, 0xBB, 0x19, 0xDA, 0x57, 0x2F, 0x77, 0x55, 0x85, 0x5A, 0xA9, 0x79, 0x43, 0x17, 0xA0 },
+	{ 0xA0, 0xD1, 0x9A, 0x6E, 0xE3, 0x39, 0x79, 0xC3, 0x25, 0x51, 0x0E, 0x27, 0x66, 0x22, 0xDF, 0x41, 0xF7, 0x15, 0x83, 0xD0, 0x75, 0x01, 0xB8, 0x70, 0x71, 0x12, 0x9A, 0x0A, 0xD9, 0x47, 0x32, 0xA5 },
+	{ 0x72, 0x46, 0x42, 0xA7, 0x03, 0x2D, 0x10, 0x62, 0xB8, 0x9E, 0x52, 0xBE, 0xA3, 0x4B, 0x75, 0xDF, 0x7D, 0x8F, 0xE7, 0x72, 0xD9, 0xFE, 0x3C, 0x93, 0xDD, 0xF3, 0xC4, 0x54, 0x5A, 0xB5, 0xA9, 0x9B },
+	{ 0xAD, 0xE5, 0xEA, 0xA7, 0xE6, 0x1F, 0x67, 0x2D, 0x58, 0x7E, 0xA0, 0x3D, 0xAE, 0x7D, 0x7B, 0x55, 0x22, 0x9C, 0x01, 0xD0, 0x6B, 0xC0, 0xA5, 0x70, 0x14, 0x36, 0xCB, 0xD1, 0x83, 0x66, 0xA6, 0x26 },
+	{ 0x01, 0x3B, 0x31, 0xEB, 0xD2, 0x28, 0xFC, 0xDD, 0xA5, 0x1F, 0xAB, 0xB0, 0x3B, 0xB0, 0x2D, 0x60, 0xAC, 0x20, 0xCA, 0x21, 0x5A, 0xAF, 0xA8, 0x3B, 0xDD, 0x85, 0x5E, 0x37, 0x55, 0xA3, 0x5F, 0x0B },
+	{ 0x33, 0x2E, 0xD4, 0x0B, 0xB1, 0x0D, 0xDE, 0x3C, 0x95, 0x4A, 0x75, 0xD7, 0xB8, 0x99, 0x9D, 0x4B, 0x26, 0xA1, 0xC0, 0x63, 0xC1, 0xDC, 0x6E, 0x32, 0xC1, 0xD9, 0x1B, 0xAB, 0x7B, 0xBB, 0x7D, 0x16 },
+	{ 0xC7, 0xA1, 0x97, 0xB3, 0xA0, 0x5B, 0x56, 0x6B, 0xCC, 0x9F, 0xAC, 0xD2, 0x0E, 0x44, 0x1D, 0x6F, 0x6C, 0x28, 0x60, 0xAC, 0x96, 0x51, 0xCD, 0x51, 0xD6, 0xB9, 0xD2, 0xCD, 0xEE, 0xEA, 0x03, 0x90 },
+	{ 0xBD, 0x9C, 0xF6, 0x4E, 0xA8, 0x95, 0x3C, 0x03, 0x71, 0x08, 0xE6, 0xF6, 0x54, 0x91, 0x4F, 0x39, 0x58, 0xB6, 0x8E, 0x29, 0xC1, 0x67, 0x00, 0xDC, 0x18, 0x4D, 0x94, 0xA2, 0x17, 0x08, 0xFF, 0x60 },
+	{ 0x88, 0x35, 0xB0, 0xAC, 0x02, 0x11, 0x51, 0xDF, 0x71, 0x64, 0x74, 0xCE, 0x27, 0xCE, 0x4D, 0x3C, 0x15, 0xF0, 0xB2, 0xDA, 0xB4, 0x80, 0x03, 0xCF, 0x3F, 0x3E, 0xFD, 0x09, 0x45, 0x10, 0x6B, 0x9A },
+	{ 0x3B, 0xFE, 0xFA, 0x33, 0x01, 0xAA, 0x55, 0xC0, 0x80, 0x19, 0x0C, 0xFF, 0xDA, 0x8E, 0xAE, 0x51, 0xD9, 0xAF, 0x48, 0x8B, 0x4C, 0x1F, 0x24, 0xC3, 0xD9, 0xA7, 0x52, 0x42, 0xFD, 0x8E, 0xA0, 0x1D },
+	{ 0x08, 0x28, 0x4D, 0x14, 0x99, 0x3C, 0xD4, 0x7D, 0x53, 0xEB, 0xAE, 0xCF, 0x0D, 0xF0, 0x47, 0x8C, 0xC1, 0x82, 0xC8, 0x9C, 0x00, 0xE1, 0x85, 0x9C, 0x84, 0x85, 0x16, 0x86, 0xDD, 0xF2, 0xC1, 0xB7 },
+	{ 0x1E, 0xD7, 0xEF, 0x9F, 0x04, 0xC2, 0xAC, 0x8D, 0xB6, 0xA8, 0x64, 0xDB, 0x13, 0x10, 0x87, 0xF2, 0x70, 0x65, 0x09, 0x8E, 0x69, 0xC3, 0xFE, 0x78, 0x71, 0x8D, 0x9B, 0x94, 0x7F, 0x4A, 0x39, 0xD0 },
+	{ 0xC1, 0x61, 0xF2, 0xDC, 0xD5, 0x7E, 0x9C, 0x14, 0x39, 0xB3, 0x1A, 0x9D, 0xD4, 0x3D, 0x8F, 0x3D, 0x7D, 0xD8, 0xF0, 0xEB, 0x7C, 0xFA, 0xC6, 0xFB, 0x25, 0xA0, 0xF2, 0x8E, 0x30, 0x6F, 0x06, 0x61 },
+	{ 0xC0, 0x19, 0x69, 0xAD, 0x34, 0xC5, 0x2C, 0xAF, 0x3D, 0xC4, 0xD8, 0x0D, 0x19, 0x73, 0x5C, 0x29, 0x73, 0x1A, 0xC6, 0xE7, 0xA9, 0x20, 0x85, 0xAB, 0x92, 0x50, 0xC4, 0x8D, 0xEA, 0x48, 0xA3, 0xFC },
+	{ 0x17, 0x20, 0xB3, 0x65, 0x56, 0x19, 0xD2, 0xA5, 0x2B, 0x35, 0x21, 0xAE, 0x0E, 0x49, 0xE3, 0x45, 0xCB, 0x33, 0x89, 0xEB, 0xD6, 0x20, 0x8A, 0xCA, 0xF9, 0xF1, 0x3F, 0xDA, 0xCC, 0xA8, 0xBE, 0x49 },
+	{ 0x75, 0x62, 0x88, 0x36, 0x1C, 0x83, 0xE2, 0x4C, 0x61, 0x7C, 0xF9, 0x5C, 0x90, 0x5B, 0x22, 0xD0, 0x17, 0xCD, 0xC8, 0x6F, 0x0B, 0xF1, 0xD6, 0x58, 0xF4, 0x75, 0x6C, 0x73, 0x79, 0x87, 0x3B, 0x7F },
+	{ 0xE7, 0xD0, 0xED, 0xA3, 0x45, 0x26, 0x93, 0xB7, 0x52, 0xAB, 0xCD, 0xA1, 0xB5, 0x5E, 0x27, 0x6F, 0x82, 0x69, 0x8F, 0x5F, 0x16, 0x05, 0x40, 0x3E, 0xFF, 0x83, 0x0B, 0xEA, 0x00, 0x71, 0xA3, 0x94 },
+	{ 0x2C, 0x82, 0xEC, 0xAA, 0x6B, 0x84, 0x80, 0x3E, 0x04, 0x4A, 0xF6, 0x31, 0x18, 0xAF, 0xE5, 0x44, 0x68, 0x7C, 0xB6, 0xE6, 0xC7, 0xDF, 0x49, 0xED, 0x76, 0x2D, 0xFD, 0x7C, 0x86, 0x93, 0xA1, 0xBC },
+	{ 0x61, 0x36, 0xCB, 0xF4, 0xB4, 0x41, 0x05, 0x6F, 0xA1, 0xE2, 0x72, 0x24, 0x98, 0x12, 0x5D, 0x6D, 0xED, 0x45, 0xE1, 0x7B, 0x52, 0x14, 0x39, 0x59, 0xC7, 0xF4, 0xD4, 0xE3, 0x95, 0x21, 0x8A, 0xC2 },
+	{ 0x72, 0x1D, 0x32, 0x45, 0xAA, 0xFE, 0xF2, 0x7F, 0x6A, 0x62, 0x4F, 0x47, 0x95, 0x4B, 0x6C, 0x25, 0x50, 0x79, 0x52, 0x6F, 0xFA, 0x25, 0xE9, 0xFF, 0x77, 0xE5, 0xDC, 0xFF, 0x47, 0x3B, 0x15, 0x97 },
+	{ 0x9D, 0xD2, 0xFB, 0xD8, 0xCE, 0xF1, 0x6C, 0x35, 0x3C, 0x0A, 0xC2, 0x11, 0x91, 0xD5, 0x09, 0xEB, 0x28, 0xDD, 0x9E, 0x3E, 0x0D, 0x8C, 0xEA, 0x5D, 0x26, 0xCA, 0x83, 0x93, 0x93, 0x85, 0x1C, 0x3A },
+	{ 0xB2, 0x39, 0x4C, 0xEA, 0xCD, 0xEB, 0xF2, 0x1B, 0xF9, 0xDF, 0x2C, 0xED, 0x98, 0xE5, 0x8F, 0x1C, 0x3A, 0x4B, 0xBB, 0xFF, 0x66, 0x0D, 0xD9, 0x00, 0xF6, 0x22, 0x02, 0xD6, 0x78, 0x5C, 0xC4, 0x6E },
+	{ 0x57, 0x08, 0x9F, 0x22, 0x27, 0x49, 0xAD, 0x78, 0x71, 0x76, 0x5F, 0x06, 0x2B, 0x11, 0x4F, 0x43, 0xBA, 0x20, 0xEC, 0x56, 0x42, 0x2A, 0x8B, 0x1E, 0x3F, 0x87, 0x19, 0x2C, 0x0E, 0xA7, 0x18, 0xC6 },
+	{ 0xE4, 0x9A, 0x94, 0x59, 0x96, 0x1C, 0xD3, 0x3C, 0xDF, 0x4A, 0xAE, 0x1B, 0x10, 0x78, 0xA5, 0xDE, 0xA7, 0xC0, 0x40, 0xE0, 0xFE, 0xA3, 0x40, 0xC9, 0x3A, 0x72, 0x48, 0x72, 0xFC, 0x4A, 0xF8, 0x06 },
+	{ 0xED, 0xE6, 0x7F, 0x72, 0x0E, 0xFF, 0xD2, 0xCA, 0x9C, 0x88, 0x99, 0x41, 0x52, 0xD0, 0x20, 0x1D, 0xEE, 0x6B, 0x0A, 0x2D, 0x2C, 0x07, 0x7A, 0xCA, 0x6D, 0xAE, 0x29, 0xF7, 0x3F, 0x8B, 0x63, 0x09 },
+	{ 0xE0, 0xF4, 0x34, 0xBF, 0x22, 0xE3, 0x08, 0x80, 0x39, 0xC2, 0x1F, 0x71, 0x9F, 0xFC, 0x67, 0xF0, 0xF2, 0xCB, 0x5E, 0x98, 0xA7, 0xA0, 0x19, 0x4C, 0x76, 0xE9, 0x6B, 0xF4, 0xE8, 0xE1, 0x7E, 0x61 },
+	{ 0x27, 0x7C, 0x04, 0xE2, 0x85, 0x34, 0x84, 0xA4, 0xEB, 0xA9, 0x10, 0xAD, 0x33, 0x6D, 0x01, 0xB4, 0x77, 0xB6, 0x7C, 0xC2, 0x00, 0xC5, 0x9F, 0x3C, 0x8D, 0x77, 0xEE, 0xF8, 0x49, 0x4F, 0x29, 0xCD },
+	{ 0x15, 0x6D, 0x57, 0x47, 0xD0, 0xC9, 0x9C, 0x7F, 0x27, 0x09, 0x7D, 0x7B, 0x7E, 0x00, 0x2B, 0x2E, 0x18, 0x5C, 0xB7, 0x2D, 0x8D, 0xD7, 0xEB, 0x42, 0x4A, 0x03, 0x21, 0x52, 0x81, 0x61, 0x21, 0x9F },
+	{ 0x20, 0xDD, 0xD1, 0xED, 0x9B, 0x1C, 0xA8, 0x03, 0x94, 0x6D, 0x64, 0xA8, 0x3A, 0xE4, 0x65, 0x9D, 0xA6, 0x7F, 0xBA, 0x7A, 0x1A, 0x3E, 0xDD, 0xB1, 0xE1, 0x03, 0xC0, 0xF5, 0xE0, 0x3E, 0x3A, 0x2C },
+	{ 0xF0, 0xAF, 0x60, 0x4D, 0x3D, 0xAB, 0xBF, 0x9A, 0x0F, 0x2A, 0x7D, 0x3D, 0xDA, 0x6B, 0xD3, 0x8B, 0xBA, 0x72, 0xC6, 0xD0, 0x9B, 0xE4, 0x94, 0xFC, 0xEF, 0x71, 0x3F, 0xF1, 0x01, 0x89, 0xB6, 0xE6 },
+	{ 0x98, 0x02, 0xBB, 0x87, 0xDE, 0xF4, 0xCC, 0x10, 0xC4, 0xA5, 0xFD, 0x49, 0xAA, 0x58, 0xDF, 0xE2, 0xF3, 0xFD, 0xDB, 0x46, 0xB4, 0x70, 0x88, 0x14, 0xEA, 0xD8, 0x1D, 0x23, 0xBA, 0x95, 0x13, 0x9B },
+	{ 0x4F, 0x8C, 0xE1, 0xE5, 0x1D, 0x2F, 0xE7, 0xF2, 0x40, 0x43, 0xA9, 0x04, 0xD8, 0x98, 0xEB, 0xFC, 0x91, 0x97, 0x54, 0x18, 0x75, 0x34, 0x13, 0xAA, 0x09, 0x9B, 0x79, 0x5E, 0xCB, 0x35, 0xCE, 0xDB },
+	{ 0xBD, 0xDC, 0x65, 0x14, 0xD7, 0xEE, 0x6A, 0xCE, 0x0A, 0x4A, 0xC1, 0xD0, 0xE0, 0x68, 0x11, 0x22, 0x88, 0xCB, 0xCF, 0x56, 0x04, 0x54, 0x64, 0x27, 0x05, 0x63, 0x01, 0x77, 0xCB, 0xA6, 0x08, 0xBD },
+	{ 0xD6, 0x35, 0x99, 0x4F, 0x62, 0x91, 0x51, 0x7B, 0x02, 0x81, 0xFF, 0xDD, 0x49, 0x6A, 0xFA, 0x86, 0x27, 0x12, 0xE5, 0xB3, 0xC4, 0xE5, 0x2E, 0x4C, 0xD5, 0xFD, 0xAE, 0x8C, 0x0E, 0x72, 0xFB, 0x08 },
+	{ 0x87, 0x8D, 0x9C, 0xA6, 0x00, 0xCF, 0x87, 0xE7, 0x69, 0xCC, 0x30, 0x5C, 0x1B, 0x35, 0x25, 0x51, 0x86, 0x61, 0x5A, 0x73, 0xA0, 0xDA, 0x61, 0x3B, 0x5F, 0x1C, 0x98, 0xDB, 0xF8, 0x12, 0x83, 0xEA },
+	{ 0xA6, 0x4E, 0xBE, 0x5D, 0xC1, 0x85, 0xDE, 0x9F, 0xDD, 0xE7, 0x60, 0x7B, 0x69, 0x98, 0x70, 0x2E, 0xB2, 0x34, 0x56, 0x18, 0x49, 0x57, 0x30, 0x7D, 0x2F, 0xA7, 0x2E, 0x87, 0xA4, 0x77, 0x02, 0xD6 },
+	{ 0xCE, 0x50, 0xEA, 0xB7, 0xB5, 0xEB, 0x52, 0xBD, 0xC9, 0xAD, 0x8E, 0x5A, 0x48, 0x0A, 0xB7, 0x80, 0xCA, 0x93, 0x20, 0xE4, 0x43, 0x60, 0xB1, 0xFE, 0x37, 0xE0, 0x3F, 0x2F, 0x7A, 0xD7, 0xDE, 0x01 },
+	{ 0xEE, 0xDD, 0xB7, 0xC0, 0xDB, 0x6E, 0x30, 0xAB, 0xE6, 0x6D, 0x79, 0xE3, 0x27, 0x51, 0x1E, 0x61, 0xFC, 0xEB, 0xBC, 0x29, 0xF1, 0x59, 0xB4, 0x0A, 0x86, 0xB0, 0x46, 0xEC, 0xF0, 0x51, 0x38, 0x23 },
+	{ 0x78, 0x7F, 0xC9, 0x34, 0x40, 0xC1, 0xEC, 0x96, 0xB5, 0xAD, 0x01, 0xC1, 0x6C, 0xF7, 0x79, 0x16, 0xA1, 0x40, 0x5F, 0x94, 0x26, 0x35, 0x6E, 0xC9, 0x21, 0xD8, 0xDF, 0xF3, 0xEA, 0x63, 0xB7, 0xE0 },
+	{ 0x7F, 0x0D, 0x5E, 0xAB, 0x47, 0xEE, 0xFD, 0xA6, 0x96, 0xC0, 0xBF, 0x0F, 0xBF, 0x86, 0xAB, 0x21, 0x6F, 0xCE, 0x46, 0x1E, 0x93, 0x03, 0xAB, 0xA6, 0xAC, 0x37, 0x41, 0x20, 0xE8, 0x90, 0xE8, 0xDF },
+	{ 0xB6, 0x80, 0x04, 0xB4, 0x2F, 0x14, 0xAD, 0x02, 0x9F, 0x4C, 0x2E, 0x03, 0xB1, 0xD5, 0xEB, 0x76, 0xD5, 0x71, 0x60, 0xE2, 0x64, 0x76, 0xD2, 0x11, 0x31, 0xBE, 0xF2, 0x0A, 0xDA, 0x7D, 0x27, 0xF4 },
+	{ 0xB0, 0xC4, 0xEB, 0x18, 0xAE, 0x25, 0x0B, 0x51, 0xA4, 0x13, 0x82, 0xEA, 0xD9, 0x2D, 0x0D, 0xC7, 0x45, 0x5F, 0x93, 0x79, 0xFC, 0x98, 0x84, 0x42, 0x8E, 0x47, 0x70, 0x60, 0x8D, 0xB0, 0xFA, 0xEC },
+	{ 0xF9, 0x2B, 0x7A, 0x87, 0x0C, 0x05, 0x9F, 0x4D, 0x46, 0x46, 0x4C, 0x82, 0x4E, 0xC9, 0x63, 0x55, 0x14, 0x0B, 0xDC, 0xE6, 0x81, 0x32, 0x2C, 0xC3, 0xA9, 0x92, 0xFF, 0x10, 0x3E, 0x3F, 0xEA, 0x52 },
+	{ 0x53, 0x64, 0x31, 0x26, 0x14, 0x81, 0x33, 0x98, 0xCC, 0x52, 0x5D, 0x4C, 0x4E, 0x14, 0x6E, 0xDE, 0xB3, 0x71, 0x26, 0x5F, 0xBA, 0x19, 0x13, 0x3A, 0x2C, 0x3D, 0x21, 0x59, 0x29, 0x8A, 0x17, 0x42 },
+	{ 0xF6, 0x62, 0x0E, 0x68, 0xD3, 0x7F, 0xB2, 0xAF, 0x50, 0x00, 0xFC, 0x28, 0xE2, 0x3B, 0x83, 0x22, 0x97, 0xEC, 0xD8, 0xBC, 0xE9, 0x9E, 0x8B, 0xE4, 0xD0, 0x4E, 0x85, 0x30, 0x9E, 0x3D, 0x33, 0x74 },
+	{ 0x53, 0x16, 0xA2, 0x79, 0x69, 0xD7, 0xFE, 0x04, 0xFF, 0x27, 0xB2, 0x83, 0x96, 0x1B, 0xFF, 0xC3, 0xBF, 0x5D, 0xFB, 0x32, 0xFB, 0x6A, 0x89, 0xD1, 0x01, 0xC6, 0xC3, 0xB1, 0x93, 0x7C, 0x28, 0x71 },
+	{ 0x81, 0xD1, 0x66, 0x4F, 0xDF, 0x3C, 0xB3, 0x3C, 0x24, 0xEE, 0xBA, 0xC0, 0xBD, 0x64, 0x24, 0x4B, 0x77, 0xC4, 0xAB, 0xEA, 0x90, 0xBB, 0xE8, 0xB5, 0xEE, 0x0B, 0x2A, 0xAF, 0xCF, 0x2D, 0x6A, 0x53 },
+	{ 0x34, 0x57, 0x82, 0xF2, 0x95, 0xB0, 0x88, 0x03, 0x52, 0xE9, 0x24, 0xA0, 0x46, 0x7B, 0x5F, 0xBC, 0x3E, 0x8F, 0x3B, 0xFB, 0xC3, 0xC7, 0xE4, 0x8B, 0x67, 0x09, 0x1F, 0xB5, 0xE8, 0x0A, 0x94, 0x42 },
+	{ 0x79, 0x41, 0x11, 0xEA, 0x6C, 0xD6, 0x5E, 0x31, 0x1F, 0x74, 0xEE, 0x41, 0xD4, 0x76, 0xCB, 0x63, 0x2C, 0xE1, 0xE4, 0xB0, 0x51, 0xDC, 0x1D, 0x9E, 0x9D, 0x06, 0x1A, 0x19, 0xE1, 0xD0, 0xBB, 0x49 },
+	{ 0x2A, 0x85, 0xDA, 0xF6, 0x13, 0x88, 0x16, 0xB9, 0x9B, 0xF8, 0xD0, 0x8B, 0xA2, 0x11, 0x4B, 0x7A, 0xB0, 0x79, 0x75, 0xA7, 0x84, 0x20, 0xC1, 0xA3, 0xB0, 0x6A, 0x77, 0x7C, 0x22, 0xDD, 0x8B, 0xCB },
+	{ 0x89, 0xB0, 0xD5, 0xF2, 0x89, 0xEC, 0x16, 0x40, 0x1A, 0x06, 0x9A, 0x96, 0x0D, 0x0B, 0x09, 0x3E, 0x62, 0x5D, 0xA3, 0xCF, 0x41, 0xEE, 0x29, 0xB5, 0x9B, 0x93, 0x0C, 0x58, 0x20, 0x14, 0x54, 0x55 },
+	{ 0xD0, 0xFD, 0xCB, 0x54, 0x39, 0x43, 0xFC, 0x27, 0xD2, 0x08, 0x64, 0xF5, 0x21, 0x81, 0x47, 0x1B, 0x94, 0x2C, 0xC7, 0x7C, 0xA6, 0x75, 0xBC, 0xB3, 0x0D, 0xF3, 0x1D, 0x35, 0x8E, 0xF7, 0xB1, 0xEB },
+	{ 0xB1, 0x7E, 0xA8, 0xD7, 0x70, 0x63, 0xC7, 0x09, 0xD4, 0xDC, 0x6B, 0x87, 0x94, 0x13, 0xC3, 0x43, 0xE3, 0x79, 0x0E, 0x9E, 0x62, 0xCA, 0x85, 0xB7, 0x90, 0x0B, 0x08, 0x6F, 0x6B, 0x75, 0xC6, 0x72 },
+	{ 0xE7, 0x1A, 0x3E, 0x2C, 0x27, 0x4D, 0xB8, 0x42, 0xD9, 0x21, 0x14, 0xF2, 0x17, 0xE2, 0xC0, 0xEA, 0xC8, 0xB4, 0x50, 0x93, 0xFD, 0xFD, 0x9D, 0xF4, 0xCA, 0x71, 0x62, 0x39, 0x48, 0x62, 0xD5, 0x01 },
+	{ 0xC0, 0x47, 0x67, 0x59, 0xAB, 0x7A, 0xA3, 0x33, 0x23, 0x4F, 0x6B, 0x44, 0xF5, 0xFD, 0x85, 0x83, 0x90, 0xEC, 0x23, 0x69, 0x4C, 0x62, 0x2C, 0xB9, 0x86, 0xE7, 0x69, 0xC7, 0x8E, 0xDD, 0x73, 0x3E },
+	{ 0x9A, 0xB8, 0xEA, 0xBB, 0x14, 0x16, 0x43, 0x4D, 0x85, 0x39, 0x13, 0x41, 0xD5, 0x69, 0x93, 0xC5, 0x54, 0x58, 0x16, 0x7D, 0x44, 0x18, 0xB1, 0x9A, 0x0F, 0x2A, 0xD8, 0xB7, 0x9A, 0x83, 0xA7, 0x5B },
+	{ 0x79, 0x92, 0xD0, 0xBB, 0xB1, 0x5E, 0x23, 0x82, 0x6F, 0x44, 0x3E, 0x00, 0x50, 0x5D, 0x68, 0xD3, 0xED, 0x73, 0x72, 0x99, 0x5A, 0x5C, 0x3E, 0x49, 0x86, 0x54, 0x10, 0x2F, 0xBC, 0xD0, 0x96, 0x4E },
+	{ 0xC0, 0x21, 0xB3, 0x00, 0x85, 0x15, 0x14, 0x35, 0xDF, 0x33, 0xB0, 0x07, 0xCC, 0xEC, 0xC6, 0x9D, 0xF1, 0x26, 0x9F, 0x39, 0xBA, 0x25, 0x09, 0x2B, 0xED, 0x59, 0xD9, 0x32, 0xAC, 0x0F, 0xDC, 0x28 },
+	{ 0x91, 0xA2, 0x5E, 0xC0, 0xEC, 0x0D, 0x9A, 0x56, 0x7F, 0x89, 0xC4, 0xBF, 0xE1, 0xA6, 0x5A, 0x0E, 0x43, 0x2D, 0x07, 0x06, 0x4B, 0x41, 0x90, 0xE2, 0x7D, 0xFB, 0x81, 0x90, 0x1F, 0xD3, 0x13, 0x9B },
+	{ 0x59, 0x50, 0xD3, 0x9A, 0x23, 0xE1, 0x54, 0x5F, 0x30, 0x12, 0x70, 0xAA, 0x1A, 0x12, 0xF2, 0xE6, 0xC4, 0x53, 0x77, 0x6E, 0x4D, 0x63, 0x55, 0xDE, 0x42, 0x5C, 0xC1, 0x53, 0xF9, 0x81, 0x88, 0x67 },
+	{ 0xD7, 0x9F, 0x14, 0x72, 0x0C, 0x61, 0x0A, 0xF1, 0x79, 0xA3, 0x76, 0x5D, 0x4B, 0x7C, 0x09, 0x68, 0xF9, 0x77, 0x96, 0x2D, 0xBF, 0x65, 0x5B, 0x52, 0x12, 0x72, 0xB6, 0xF1, 0xE1, 0x94, 0x48, 0x8E },
+	{ 0xE9, 0x53, 0x1B, 0xFC, 0x8B, 0x02, 0x99, 0x5A, 0xEA, 0xA7, 0x5B, 0xA2, 0x70, 0x31, 0xFA, 0xDB, 0xCB, 0xF4, 0xA0, 0xDA, 0xB8, 0x96, 0x1D, 0x92, 0x96, 0xCD, 0x7E, 0x84, 0xD2, 0x5D, 0x60, 0x06 },
+	{ 0x34, 0xE9, 0xC2, 0x6A, 0x01, 0xD7, 0xF1, 0x61, 0x81, 0xB4, 0x54, 0xA9, 0xD1, 0x62, 0x3C, 0x23, 0x3C, 0xB9, 0x9D, 0x31, 0xC6, 0x94, 0x65, 0x6E, 0x94, 0x13, 0xAC, 0xA3, 0xE9, 0x18, 0x69, 0x2F },
+	{ 0xD9, 0xD7, 0x42, 0x2F, 0x43, 0x7B, 0xD4, 0x39, 0xDD, 0xD4, 0xD8, 0x83, 0xDA, 0xE2, 0xA0, 0x83, 0x50, 0x17, 0x34, 0x14, 0xBE, 0x78, 0x15, 0x51, 0x33, 0xFF, 0xF1, 0x96, 0x4C, 0x3D, 0x79, 0x72 },
+	{ 0x4A, 0xEE, 0x0C, 0x7A, 0xAF, 0x07, 0x54, 0x14, 0xFF, 0x17, 0x93, 0xEA, 0xD7, 0xEA, 0xCA, 0x60, 0x17, 0x75, 0xC6, 0x15, 0xDB, 0xD6, 0x0B, 0x64, 0x0B, 0x0A, 0x9F, 0x0C, 0xE5, 0x05, 0xD4, 0x35 },
+	{ 0x6B, 0xFD, 0xD1, 0x54, 0x59, 0xC8, 0x3B, 0x99, 0xF0, 0x96, 0xBF, 0xB4, 0x9E, 0xE8, 0x7B, 0x06, 0x3D, 0x69, 0xC1, 0x97, 0x4C, 0x69, 0x28, 0xAC, 0xFC, 0xFB, 0x40, 0x99, 0xF8, 0xC4, 0xEF, 0x67 },
+	{ 0x9F, 0xD1, 0xC4, 0x08, 0xFD, 0x75, 0xC3, 0x36, 0x19, 0x3A, 0x2A, 0x14, 0xD9, 0x4F, 0x6A, 0xF5, 0xAD, 0xF0, 0x50, 0xB8, 0x03, 0x87, 0xB4, 0xB0, 0x10, 0xFB, 0x29, 0xF4, 0xCC, 0x72, 0x70, 0x7C },
+	{ 0x13, 0xC8, 0x84, 0x80, 0xA5, 0xD0, 0x0D, 0x6C, 0x8C, 0x7A, 0xD2, 0x11, 0x0D, 0x76, 0xA8, 0x2D, 0x9B, 0x70, 0xF4, 0xFA, 0x66, 0x96, 0xD4, 0xE5, 0xDD, 0x42, 0xA0, 0x66, 0xDC, 0xAF, 0x99, 0x20 },
+	{ 0x82, 0x0E, 0x72, 0x5E, 0xE2, 0x5F, 0xE8, 0xFD, 0x3A, 0x8D, 0x5A, 0xBE, 0x4C, 0x46, 0xC3, 0xBA, 0x88, 0x9D, 0xE6, 0xFA, 0x91, 0x91, 0xAA, 0x22, 0xBA, 0x67, 0xD5, 0x70, 0x54, 0x21, 0x54, 0x2B },
+	{ 0x32, 0xD9, 0x3A, 0x0E, 0xB0, 0x2F, 0x42, 0xFB, 0xBC, 0xAF, 0x2B, 0xAD, 0x00, 0x85, 0xB2, 0x82, 0xE4, 0x60, 0x46, 0xA4, 0xDF, 0x7A, 0xD1, 0x06, 0x57, 0xC9, 0xD6, 0x47, 0x63, 0x75, 0xB9, 0x3E },
+	{ 0xAD, 0xC5, 0x18, 0x79, 0x05, 0xB1, 0x66, 0x9C, 0xD8, 0xEC, 0x9C, 0x72, 0x1E, 0x19, 0x53, 0x78, 0x6B, 0x9D, 0x89, 0xA9, 0xBA, 0xE3, 0x07, 0x80, 0xF1, 0xE1, 0xEA, 0xB2, 0x4A, 0x00, 0x52, 0x3C },
+	{ 0xE9, 0x07, 0x56, 0xFF, 0x7F, 0x9A, 0xD8, 0x10, 0xB2, 0x39, 0xA1, 0x0C, 0xED, 0x2C, 0xF9, 0xB2, 0x28, 0x43, 0x54, 0xC1, 0xF8, 0xC7, 0xE0, 0xAC, 0xCC, 0x24, 0x61, 0xDC, 0x79, 0x6D, 0x6E, 0x89 },
+	{ 0x12, 0x51, 0xF7, 0x6E, 0x56, 0x97, 0x84, 0x81, 0x87, 0x53, 0x59, 0x80, 0x1D, 0xB5, 0x89, 0xA0, 0xB2, 0x2F, 0x86, 0xD8, 0xD6, 0x34, 0xDC, 0x04, 0x50, 0x6F, 0x32, 0x2E, 0xD7, 0x8F, 0x17, 0xE8 },
+	{ 0x3A, 0xFA, 0x89, 0x9F, 0xD9, 0x80, 0xE7, 0x3E, 0xCB, 0x7F, 0x4D, 0x8B, 0x8F, 0x29, 0x1D, 0xC9, 0xAF, 0x79, 0x6B, 0xC6, 0x5D, 0x27, 0xF9, 0x74, 0xC6, 0xF1, 0x93, 0xC9, 0x19, 0x1A, 0x09, 0xFD },
+	{ 0xAA, 0x30, 0x5B, 0xE2, 0x6E, 0x5D, 0xED, 0xDC, 0x3C, 0x10, 0x10, 0xCB, 0xC2, 0x13, 0xF9, 0x5F, 0x05, 0x1C, 0x78, 0x5C, 0x5B, 0x43, 0x1E, 0x6A, 0x7C, 0xD0, 0x48, 0xF1, 0x61, 0x78, 0x75, 0x28 },
+	{ 0x8E, 0xA1, 0x88, 0x4F, 0xF3, 0x2E, 0x9D, 0x10, 0xF0, 0x39, 0xB4, 0x07, 0xD0, 0xD4, 0x4E, 0x7E, 0x67, 0x0A, 0xBD, 0x88, 0x4A, 0xEE, 0xE0, 0xFB, 0x75, 0x7A, 0xE9, 0x4E, 0xAA, 0x97, 0x37, 0x3D },
+	{ 0xD4, 0x82, 0xB2, 0x15, 0x5D, 0x4D, 0xEC, 0x6B, 0x47, 0x36, 0xA1, 0xF1, 0x61, 0x7B, 0x53, 0xAA, 0xA3, 0x73, 0x10, 0x27, 0x7D, 0x3F, 0xEF, 0x0C, 0x37, 0xAD, 0x41, 0x76, 0x8F, 0xC2, 0x35, 0xB4 },
+	{ 0x4D, 0x41, 0x39, 0x71, 0x38, 0x7E, 0x7A, 0x88, 0x98, 0xA8, 0xDC, 0x2A, 0x27, 0x50, 0x07, 0x78, 0x53, 0x9E, 0xA2, 0x14, 0xA2, 0xDF, 0xE9, 0xB3, 0xD7, 0xE8, 0xEB, 0xDC, 0xE5, 0xCF, 0x3D, 0xB3 },
+	{ 0x69, 0x6E, 0x5D, 0x46, 0xE6, 0xC5, 0x7E, 0x87, 0x96, 0xE4, 0x73, 0x5D, 0x08, 0x91, 0x6E, 0x0B, 0x79, 0x29, 0xB3, 0xCF, 0x29, 0x8C, 0x29, 0x6D, 0x22, 0xE9, 0xD3, 0x01, 0x96, 0x53, 0x37, 0x1C },
+	{ 0x1F, 0x56, 0x47, 0xC1, 0xD3, 0xB0, 0x88, 0x22, 0x88, 0x85, 0x86, 0x5C, 0x89, 0x40, 0x90, 0x8B, 0xF4, 0x0D, 0x1A, 0x82, 0x72, 0x82, 0x19, 0x73, 0xB1, 0x60, 0x00, 0x8E, 0x7A, 0x3C, 0xE2, 0xEB },
+	{ 0xB6, 0xE7, 0x6C, 0x33, 0x0F, 0x02, 0x1A, 0x5B, 0xDA, 0x65, 0x87, 0x50, 0x10, 0xB0, 0xED, 0xF0, 0x91, 0x26, 0xC0, 0xF5, 0x10, 0xEA, 0x84, 0x90, 0x48, 0x19, 0x20, 0x03, 0xAE, 0xF4, 0xC6, 0x1C },
+	{ 0x3C, 0xD9, 0x52, 0xA0, 0xBE, 0xAD, 0xA4, 0x1A, 0xBB, 0x42, 0x4C, 0xE4, 0x7F, 0x94, 0xB4, 0x2B, 0xE6, 0x4E, 0x1F, 0xFB, 0x0F, 0xD0, 0x78, 0x22, 0x76, 0x80, 0x79, 0x46, 0xD0, 0xD0, 0xBC, 0x55 },
+	{ 0x98, 0xD9, 0x26, 0x77, 0x43, 0x9B, 0x41, 0xB7, 0xBB, 0x51, 0x33, 0x12, 0xAF, 0xB9, 0x2B, 0xCC, 0x8E, 0xE9, 0x68, 0xB2, 0xE3, 0xB2, 0x38, 0xCE, 0xCB, 0x9B, 0x0F, 0x34, 0xC9, 0xBB, 0x63, 0xD0 },
+	{ 0xEC, 0xBC, 0xA2, 0xCF, 0x08, 0xAE, 0x57, 0xD5, 0x17, 0xAD, 0x16, 0x15, 0x8A, 0x32, 0xBF, 0xA7, 0xDC, 0x03, 0x82, 0xEA, 0xED, 0xA1, 0x28, 0xE9, 0x18, 0x86, 0x73, 0x4C, 0x24, 0xA0, 0xB2, 0x9D },
+	{ 0x94, 0x2C, 0xC7, 0xC0, 0xB5, 0x2E, 0x2B, 0x16, 0xA4, 0xB8, 0x9F, 0xA4, 0xFC, 0x7E, 0x0B, 0xF6, 0x09, 0xE2, 0x9A, 0x08, 0xC1, 0xA8, 0x54, 0x34, 0x52, 0xB7, 0x7C, 0x7B, 0xFD, 0x11, 0xBB, 0x28 },
+	{ 0x8A, 0x06, 0x5D, 0x8B, 0x61, 0xA0, 0xDF, 0xFB, 0x17, 0x0D, 0x56, 0x27, 0x73, 0x5A, 0x76, 0xB0, 0xE9, 0x50, 0x60, 0x37, 0x80, 0x8C, 0xBA, 0x16, 0xC3, 0x45, 0x00, 0x7C, 0x9F, 0x79, 0xCF, 0x8F },
+	{ 0x1B, 0x9F, 0xA1, 0x97, 0x14, 0x65, 0x9C, 0x78, 0xFF, 0x41, 0x38, 0x71, 0x84, 0x92, 0x15, 0x36, 0x10, 0x29, 0xAC, 0x80, 0x2B, 0x1C, 0xBC, 0xD5, 0x4E, 0x40, 0x8B, 0xD8, 0x72, 0x87, 0xF8, 0x1F },
+	{ 0x8D, 0xAB, 0x07, 0x1B, 0xCD, 0x6C, 0x72, 0x92, 0xA9, 0xEF, 0x72, 0x7B, 0x4A, 0xE0, 0xD8, 0x67, 0x13, 0x30, 0x1D, 0xA8, 0x61, 0x8D, 0x9A, 0x48, 0xAD, 0xCE, 0x55, 0xF3, 0x03, 0xA8, 0x69, 0xA1 },
+	{ 0x82, 0x53, 0xE3, 0xE7, 0xC7, 0xB6, 0x84, 0xB9, 0xCB, 0x2B, 0xEB, 0x01, 0x4C, 0xE3, 0x30, 0xFF, 0x3D, 0x99, 0xD1, 0x7A, 0xBB, 0xDB, 0xAB, 0xE4, 0xF4, 0xD6, 0x74, 0xDE, 0xD5, 0x3F, 0xFC, 0x6B },
+	{ 0xF1, 0x95, 0xF3, 0x21, 0xE9, 0xE3, 0xD6, 0xBD, 0x7D, 0x07, 0x45, 0x04, 0xDD, 0x2A, 0xB0, 0xE6, 0x24, 0x1F, 0x92, 0xE7, 0x84, 0xB1, 0xAA, 0x27, 0x1F, 0xF6, 0x48, 0xB1, 0xCA, 0xB6, 0xD7, 0xF6 },
+	{ 0x27, 0xE4, 0xCC, 0x72, 0x09, 0x0F, 0x24, 0x12, 0x66, 0x47, 0x6A, 0x7C, 0x09, 0x49, 0x5F, 0x2D, 0xB1, 0x53, 0xD5, 0xBC, 0xBD, 0x76, 0x19, 0x03, 0xEF, 0x79, 0x27, 0x5E, 0xC5, 0x6B, 0x2E, 0xD8 },
+	{ 0x89, 0x9C, 0x24, 0x05, 0x78, 0x8E, 0x25, 0xB9, 0x9A, 0x18, 0x46, 0x35, 0x5E, 0x64, 0x6D, 0x77, 0xCF, 0x40, 0x00, 0x83, 0x41, 0x5F, 0x7D, 0xC5, 0xAF, 0xE6, 0x9D, 0x6E, 0x17, 0xC0, 0x00, 0x23 },
+	{ 0xA5, 0x9B, 0x78, 0xC4, 0x90, 0x57, 0x44, 0x07, 0x6B, 0xFE, 0xE8, 0x94, 0xDE, 0x70, 0x7D, 0x4F, 0x12, 0x0B, 0x5C, 0x68, 0x93, 0xEA, 0x04, 0x00, 0x29, 0x7D, 0x0B, 0xB8, 0x34, 0x72, 0x76, 0x32 },
+	{ 0x59, 0xDC, 0x78, 0xB1, 0x05, 0x64, 0x97, 0x07, 0xA2, 0xBB, 0x44, 0x19, 0xC4, 0x8F, 0x00, 0x54, 0x00, 0xD3, 0x97, 0x3D, 0xE3, 0x73, 0x66, 0x10, 0x23, 0x04, 0x35, 0xB1, 0x04, 0x24, 0xB2, 0x4F },
+	{ 0xC0, 0x14, 0x9D, 0x1D, 0x7E, 0x7A, 0x63, 0x53, 0xA6, 0xD9, 0x06, 0xEF, 0xE7, 0x28, 0xF2, 0xF3, 0x29, 0xFE, 0x14, 0xA4, 0x14, 0x9A, 0x3E, 0xA7, 0x76, 0x09, 0xBC, 0x42, 0xB9, 0x75, 0xDD, 0xFA },
+	{ 0xA3, 0x2F, 0x24, 0x14, 0x74, 0xA6, 0xC1, 0x69, 0x32, 0xE9, 0x24, 0x3B, 0xE0, 0xCF, 0x09, 0xBC, 0xDC, 0x7E, 0x0C, 0xA0, 0xE7, 0xA6, 0xA1, 0xB9, 0xB1, 0xA0, 0xF0, 0x1E, 0x41, 0x50, 0x23, 0x77 },
+	{ 0xB2, 0x39, 0xB2, 0xE4, 0xF8, 0x18, 0x41, 0x36, 0x1C, 0x13, 0x39, 0xF6, 0x8E, 0x2C, 0x35, 0x9F, 0x92, 0x9A, 0xF9, 0xAD, 0x9F, 0x34, 0xE0, 0x1A, 0xAB, 0x46, 0x31, 0xAD, 0x6D, 0x55, 0x00, 0xB0 },
+	{ 0x85, 0xFB, 0x41, 0x9C, 0x70, 0x02, 0xA3, 0xE0, 0xB4, 0xB6, 0xEA, 0x09, 0x3B, 0x4C, 0x1A, 0xC6, 0x93, 0x66, 0x45, 0xB6, 0x5D, 0xAC, 0x5A, 0xC1, 0x5A, 0x85, 0x28, 0xB7, 0xB9, 0x4C, 0x17, 0x54 },
+	{ 0x96, 0x19, 0x72, 0x06, 0x25, 0xF1, 0x90, 0xB9, 0x3A, 0x3F, 0xAD, 0x18, 0x6A, 0xB3, 0x14, 0x18, 0x96, 0x33, 0xC0, 0xD3, 0xA0, 0x1E, 0x6F, 0x9B, 0xC8, 0xC4, 0xA8, 0xF8, 0x2F, 0x38, 0x3D, 0xBF },
+	{ 0x7D, 0x62, 0x0D, 0x90, 0xFE, 0x69, 0xFA, 0x46, 0x9A, 0x65, 0x38, 0x38, 0x89, 0x70, 0xA1, 0xAA, 0x09, 0xBB, 0x48, 0xA2, 0xD5, 0x9B, 0x34, 0x7B, 0x97, 0xE8, 0xCE, 0x71, 0xF4, 0x8C, 0x7F, 0x46 },
+	{ 0x29, 0x43, 0x83, 0x56, 0x85, 0x96, 0xFB, 0x37, 0xC7, 0x5B, 0xBA, 0xCD, 0x97, 0x9C, 0x5F, 0xF6, 0xF2, 0x0A, 0x55, 0x6B, 0xF8, 0x87, 0x9C, 0xC7, 0x29, 0x24, 0x85, 0x5D, 0xF9, 0xB8, 0x24, 0x0E },
+	{ 0x16, 0xB1, 0x8A, 0xB3, 0x14, 0x35, 0x9C, 0x2B, 0x83, 0x3C, 0x1C, 0x69, 0x86, 0xD4, 0x8C, 0x55, 0xA9, 0xFC, 0x97, 0xCD, 0xE9, 0xA3, 0xC1, 0xF1, 0x0A, 0x31, 0x77, 0x14, 0x0F, 0x73, 0xF7, 0x38 },
+	{ 0x8C, 0xBB, 0xDD, 0x14, 0xBC, 0x33, 0xF0, 0x4C, 0xF4, 0x58, 0x13, 0xE4, 0xA1, 0x53, 0xA2, 0x73, 0xD3, 0x6A, 0xDA, 0xD5, 0xCE, 0x71, 0xF4, 0x99, 0xEE, 0xB8, 0x7F, 0xB8, 0xAC, 0x63, 0xB7, 0x29 },
+	{ 0x69, 0xC9, 0xA4, 0x98, 0xDB, 0x17, 0x4E, 0xCA, 0xEF, 0xCC, 0x5A, 0x3A, 0xC9, 0xFD, 0xED, 0xF0, 0xF8, 0x13, 0xA5, 0xBE, 0xC7, 0x27, 0xF1, 0xE7, 0x75, 0xBA, 0xBD, 0xEC, 0x77, 0x18, 0x81, 0x6E },
+	{ 0xB4, 0x62, 0xC3, 0xBE, 0x40, 0x44, 0x8F, 0x1D, 0x4F, 0x80, 0x62, 0x62, 0x54, 0xE5, 0x35, 0xB0, 0x8B, 0xC9, 0xCD, 0xCF, 0xF5, 0x99, 0xA7, 0x68, 0x57, 0x8D, 0x4B, 0x28, 0x81, 0xA8, 0xE3, 0xF0 },
+	{ 0x55, 0x3E, 0x9D, 0x9C, 0x5F, 0x36, 0x0A, 0xC0, 0xB7, 0x4A, 0x7D, 0x44, 0xE5, 0xA3, 0x91, 0xDA, 0xD4, 0xCE, 0xD0, 0x3E, 0x0C, 0x24, 0x18, 0x3B, 0x7E, 0x8E, 0xCA, 0xBD, 0xF1, 0x71, 0x5A, 0x64 },
+	{ 0x7A, 0x7C, 0x55, 0xA5, 0x6F, 0xA9, 0xAE, 0x51, 0xE6, 0x55, 0xE0, 0x19, 0x75, 0xD8, 0xA6, 0xFF, 0x4A, 0xE9, 0xE4, 0xB4, 0x86, 0xFC, 0xBE, 0x4E, 0xAC, 0x04, 0x45, 0x88, 0xF2, 0x45, 0xEB, 0xEA },
+	{ 0x2A, 0xFD, 0xF3, 0xC8, 0x2A, 0xBC, 0x48, 0x67, 0xF5, 0xDE, 0x11, 0x12, 0x86, 0xC2, 0xB3, 0xBE, 0x7D, 0x6E, 0x48, 0x65, 0x7B, 0xA9, 0x23, 0xCF, 0xBF, 0x10, 0x1A, 0x6D, 0xFC, 0xF9, 0xDB, 0x9A },
+	{ 0x41, 0x03, 0x7D, 0x2E, 0xDC, 0xDC, 0xE0, 0xC4, 0x9B, 0x7F, 0xB4, 0xA6, 0xAA, 0x09, 0x99, 0xCA, 0x66, 0x97, 0x6C, 0x74, 0x83, 0xAF, 0xE6, 0x31, 0xD4, 0xED, 0xA2, 0x83, 0x14, 0x4F, 0x6D, 0xFC },
+	{ 0xC4, 0x46, 0x6F, 0x84, 0x97, 0xCA, 0x2E, 0xEB, 0x45, 0x83, 0xA0, 0xB0, 0x8E, 0x9D, 0x9A, 0xC7, 0x43, 0x95, 0x70, 0x9F, 0xDA, 0x10, 0x9D, 0x24, 0xF2, 0xE4, 0x46, 0x21, 0x96, 0x77, 0x9C, 0x5D },
+	{ 0x75, 0xF6, 0x09, 0x33, 0x8A, 0xA6, 0x7D, 0x96, 0x9A, 0x2A, 0xE2, 0xA2, 0x36, 0x2B, 0x2D, 0xA9, 0xD7, 0x7C, 0x69, 0x5D, 0xFD, 0x1D, 0xF7, 0x22, 0x4A, 0x69, 0x01, 0xDB, 0x93, 0x2C, 0x33, 0x64 },
+	{ 0x68, 0x60, 0x6C, 0xEB, 0x98, 0x9D, 0x54, 0x88, 0xFC, 0x7C, 0xF6, 0x49, 0xF3, 0xD7, 0xC2, 0x72, 0xEF, 0x05, 0x5D, 0xA1, 0xA9, 0x3F, 0xAE, 0xCD, 0x55, 0xFE, 0x06, 0xF6, 0x96, 0x70, 0x98, 0xCA },
+	{ 0x44, 0x34, 0x6B, 0xDE, 0xB7, 0xE0, 0x52, 0xF6, 0x25, 0x50, 0x48, 0xF0, 0xD9, 0xB4, 0x2C, 0x42, 0x5B, 0xAB, 0x9C, 0x3D, 0xD2, 0x41, 0x68, 0x21, 0x2C, 0x3E, 0xCF, 0x1E, 0xBF, 0x34, 0xE6, 0xAE },
+	{ 0x8E, 0x9C, 0xF6, 0xE1, 0xF3, 0x66, 0x47, 0x1F, 0x2A, 0xC7, 0xD2, 0xEE, 0x9B, 0x5E, 0x62, 0x66, 0xFD, 0xA7, 0x1F, 0x8F, 0x2E, 0x41, 0x09, 0xF2, 0x23, 0x7E, 0xD5, 0xF8, 0x81, 0x3F, 0xC7, 0x18 },
+	{ 0x84, 0xBB, 0xEB, 0x84, 0x06, 0xD2, 0x50, 0x95, 0x1F, 0x8C, 0x1B, 0x3E, 0x86, 0xA7, 0xC0, 0x10, 0x08, 0x29, 0x21, 0x83, 0x3D, 0xFD, 0x95, 0x55, 0xA2, 0xF9, 0x09, 0xB1, 0x08, 0x6E, 0xB4, 0xB8 },
+	{ 0xEE, 0x66, 0x6F, 0x3E, 0xEF, 0x0F, 0x7E, 0x2A, 0x9C, 0x22, 0x29, 0x58, 0xC9, 0x7E, 0xAF, 0x35, 0xF5, 0x1C, 0xED, 0x39, 0x3D, 0x71, 0x44, 0x85, 0xAB, 0x09, 0xA0, 0x69, 0x34, 0x0F, 0xDF, 0x88 },
+	{ 0xC1, 0x53, 0xD3, 0x4A, 0x65, 0xC4, 0x7B, 0x4A, 0x62, 0xC5, 0xCA, 0xCF, 0x24, 0x01, 0x09, 0x75, 0xD0, 0x35, 0x6B, 0x2F, 0x32, 0xC8, 0xF5, 0xDA, 0x53, 0x0D, 0x33, 0x88, 0x16, 0xAD, 0x5D, 0xE6 },
+	{ 0x9F, 0xC5, 0x45, 0x01, 0x09, 0xE1, 0xB7, 0x79, 0xF6, 0xC7, 0xAE, 0x79, 0xD5, 0x6C, 0x27, 0x63, 0x5C, 0x8D, 0xD4, 0x26, 0xC5, 0xA9, 0xD5, 0x4E, 0x25, 0x78, 0xDB, 0x98, 0x9B, 0x8C, 0x3B, 0x4E },
+	{ 0xD1, 0x2B, 0xF3, 0x73, 0x2E, 0xF4, 0xAF, 0x5C, 0x22, 0xFA, 0x90, 0x35, 0x6A, 0xF8, 0xFC, 0x50, 0xFC, 0xB4, 0x0F, 0x8F, 0x2E, 0xA5, 0xC8, 0x59, 0x47, 0x37, 0xA3, 0xB3, 0xD5, 0xAB, 0xDB, 0xD7 },
+	{ 0x11, 0x03, 0x0B, 0x92, 0x89, 0xBB, 0xA5, 0xAF, 0x65, 0x26, 0x06, 0x72, 0xAB, 0x6F, 0xEE, 0x88, 0xB8, 0x74, 0x20, 0xAC, 0xEF, 0x4A, 0x17, 0x89, 0xA2, 0x07, 0x3B, 0x7E, 0xC2, 0xF2, 0xA0, 0x9E },
+	{ 0x69, 0xCB, 0x19, 0x2B, 0x84, 0x44, 0x00, 0x5C, 0x8C, 0x0C, 0xEB, 0x12, 0xC8, 0x46, 0x86, 0x07, 0x68, 0x18, 0x8C, 0xDA, 0x0A, 0xEC, 0x27, 0xA9, 0xC8, 0xA5, 0x5C, 0xDE, 0xE2, 0x12, 0x36, 0x32 },
+	{ 0xDB, 0x44, 0x4C, 0x15, 0x59, 0x7B, 0x5F, 0x1A, 0x03, 0xD1, 0xF9, 0xED, 0xD1, 0x6E, 0x4A, 0x9F, 0x43, 0xA6, 0x67, 0xCC, 0x27, 0x51, 0x75, 0xDF, 0xA2, 0xB7, 0x04, 0xE3, 0xBB, 0x1A, 0x9B, 0x83 },
+	{ 0x3F, 0xB7, 0x35, 0x06, 0x1A, 0xBC, 0x51, 0x9D, 0xFE, 0x97, 0x9E, 0x54, 0xC1, 0xEE, 0x5B, 0xFA, 0xD0, 0xA9, 0xD8, 0x58, 0xB3, 0x31, 0x5B, 0xAD, 0x34, 0xBD, 0xE9, 0x99, 0xEF, 0xD7, 0x24, 0xDD }
+};
+
+bool __init blake2s_selftest(void)
+{
+	u8 key[BLAKE2S_KEYBYTES];
+	u8 buf[ARRAY_SIZE(blake2s_testvecs)];
+	u8 hash[BLAKE2S_OUTBYTES];
+	size_t i;
+	bool success = true;
+
+	for (i = 0; i < BLAKE2S_KEYBYTES; ++i)
+		key[i] = (u8)i;
+
+	for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i)
+		buf[i] = (u8)i;
+
+	for (i = 0; i < ARRAY_SIZE(blake2s_keyed_testvecs); ++i) {
+		blake2s(hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES);
+		if (memcmp(hash, blake2s_keyed_testvecs[i], BLAKE2S_OUTBYTES)) {
+			pr_info("blake2s keyed self-test %zu: FAIL\n", i + 1);
+			success = false;
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i) {
+		blake2s(hash, buf, NULL, BLAKE2S_OUTBYTES, i, 0);
+		if (memcmp(hash, blake2s_testvecs[i], BLAKE2S_OUTBYTES)) {
+			pr_info("blake2s unkeyed self-test %zu: FAIL\n", i + i);
+			success = false;
+		}
+	}
+
+	if (success)
+		pr_info("blake2s self-tests: pass\n");
+	return success;
+}
+#endif
diff --git b/net/wireguard/selftest/chacha20poly1305.h b/net/wireguard/selftest/chacha20poly1305.h
new file mode 100644
index 0000000..71f178e
--- /dev/null
+++ b/net/wireguard/selftest/chacha20poly1305.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+struct chacha20poly1305_testvec {
+	u8 *key, *nonce, *assoc, *input, *result;
+	size_t alen, ilen;
+};
+
+/* The first of these are the ChaCha20-Poly1305 AEAD test vectors from RFC7539 2.8.2. The rest are generated by:
+ * #!/usr/bin/env python3
+ *
+ * from cryptography.hazmat.primitives.ciphers.aead import ChaCha20Poly1305
+ * import os
+ *
+ * def encode_blob(blob):
+ *     a = ""
+ *     for i in blob:
+ *         a += "\\x" + hex(i)[2:]
+ *     return a
+ *
+ * enc = [ ]
+ * dec = [ ]
+ *
+ * def make_vector(plen, adlen):
+ *     key = os.urandom(32)
+ *     nonce = os.urandom(8)
+ *     p = os.urandom(plen)
+ *     ad = os.urandom(adlen)
+ *     c = ChaCha20Poly1305(key).encrypt(nonce=bytes(4) + nonce, data=p, associated_data=ad)
+ *
+ *     out = "{\n"
+ *     out += "\t.key\t= \"" + encode_blob(key) + "\",\n"
+ *     out += "\t.nonce\t= \"" + encode_blob(nonce) + "\",\n"
+ *     out += "\t.assoc\t= \"" + encode_blob(ad) + "\",\n"
+ *     out += "\t.alen\t= " + str(len(ad)) + ",\n"
+ *     out += "\t.input\t= \"" + encode_blob(p) + "\",\n"
+ *     out += "\t.ilen\t= " + str(len(p)) + ",\n"
+ *     out += "\t.result\t= \"" + encode_blob(c) + "\"\n"
+ *     out += "}"
+ *     enc.append(out)
+ *
+ *
+ *     out = "{\n"
+ *     out += "\t.key\t= \"" + encode_blob(key) + "\",\n"
+ *     out += "\t.nonce\t= \"" + encode_blob(nonce) + "\",\n"
+ *     out += "\t.assoc\t= \"" + encode_blob(ad) + "\",\n"
+ *     out += "\t.alen\t= " + str(len(ad)) + ",\n"
+ *     out += "\t.input\t= \"" + encode_blob(c) + "\",\n"
+ *     out += "\t.ilen\t= " + str(len(c)) + ",\n"
+ *     out += "\t.result\t= \"" + encode_blob(p) + "\"\n"
+ *     out += "}"
+ *     dec.append(out)
+ *
+ *
+ * make_vector(0, 0)
+ * make_vector(0, 8)
+ * make_vector(1, 8)
+ * make_vector(1, 0)
+ * make_vector(129, 7)
+ * make_vector(256, 0)
+ * make_vector(512, 0)
+ * make_vector(513, 9)
+ * make_vector(1024, 16)
+ * make_vector(1933, 7)
+ * make_vector(2011, 63)
+ *
+ * print("======== encryption vectors ========")
+ * print(", ".join(enc))
+ *
+ * print("\n\n\n======== decryption vectors ========")
+ * print(", ".join(dec))
+ */
+
+static const struct chacha20poly1305_testvec chacha20poly1305_enc_vectors[] __initconst = { {
+	.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a\xf3\x33\x88\x86\x04\xf6\xb5\xf0\x47\x39\x17\xc1\x40\x2b\x80\x09\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+	.nonce	= "\x01\x02\x03\x04\x05\x06\x07\x08",
+	.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00\x00\x00\x4e\x91",
+	.alen	= 12,
+	.input	= "\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x72\x65\x20\x64\x72\x61\x66\x74\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x76\x61\x6c\x69\x64\x20\x66\x6f\x72\x20\x61\x20\x6d\x61\x78\x69\x6d\x75\x6d\x20\x6f\x66\x20\x73\x69\x78\x20\x6d\x6f\x6e\x74\x68\x73\x20\x61\x6e\x64\x20\x6d\x61\x79\x20\x62\x65\x20\x75\x70\x64\x61\x74\x65\x64\x2c\x20\x72\x65\x70\x6c\x61\x63\x65\x64\x2c\x20\x6f\x72\x20\x6f\x62\x73\x6f\x6c\x65\x74\x65\x64\x20\x62\x79\x20\x6f\x74\x68\x65\x72\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x61\x74\x20\x61\x6e\x79\x20\x74\x69\x6d\x65\x2e\x20\x49\x74\x20\x69\x73\x20\x69\x6e\x61\x70\x70\x72\x6f\x70\x72\x69\x61\x74\x65\x20\x74\x6f\x20\x75\x73\x65\x20\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x73\x20\x72\x65\x66\x65\x72\x65\x6e\x63\x65\x20\x6d\x61\x74\x65\x72\x69\x61\x6c\x20\x6f\x72\x20\x74\x6f\x20\x63\x69\x74\x65\x20\x74\x68\x65\x6d\x20\x6f\x74\x68\x65\x72\x20\x74\x68\x61\x6e\x20\x61\x73\x20\x2f\xe2\x80\x9c\x77\x6f\x72\x6b\x20\x69\x6e\x20\x70\x72\x6f\x67\x72\x65\x73\x73\x2e\x2f\xe2\x80\x9d",
+	.ilen	= 265,
+	.result	= "\x64\xa0\x86\x15\x75\x86\x1a\xf4\x60\xf0\x62\xc7\x9b\xe6\x43\xbd\x5e\x80\x5c\xfd\x34\x5c\xf3\x89\xf1\x08\x67\x0a\xc7\x6c\x8c\xb2\x4c\x6c\xfc\x18\x75\x5d\x43\xee\xa0\x9e\xe9\x4e\x38\x2d\x26\xb0\xbd\xb7\xb7\x3c\x32\x1b\x01\x00\xd4\xf0\x3b\x7f\x35\x58\x94\xcf\x33\x2f\x83\x0e\x71\x0b\x97\xce\x98\xc8\xa8\x4a\xbd\x0b\x94\x81\x14\xad\x17\x6e\x00\x8d\x33\xbd\x60\xf9\x82\xb1\xff\x37\xc8\x55\x97\x97\xa0\x6e\xf4\xf0\xef\x61\xc1\x86\x32\x4e\x2b\x35\x06\x38\x36\x06\x90\x7b\x6a\x7c\x02\xb0\xf9\xf6\x15\x7b\x53\xc8\x67\xe4\xb9\x16\x6c\x76\x7b\x80\x4d\x46\xa5\x9b\x52\x16\xcd\xe7\xa4\xe9\x90\x40\xc5\xa4\x04\x33\x22\x5e\xe2\x82\xa1\xb0\xa0\x6c\x52\x3e\xaf\x45\x34\xd7\xf8\x3f\xa1\x15\x5b\x00\x47\x71\x8c\xbc\x54\x6a\x0d\x07\x2b\x04\xb3\x56\x4e\xea\x1b\x42\x22\x73\xf5\x48\x27\x1a\x0b\xb2\x31\x60\x53\xfa\x76\x99\x19\x55\xeb\xd6\x31\x59\x43\x4e\xce\xbb\x4e\x46\x6d\xae\x5a\x10\x73\xa6\x72\x76\x27\x09\x7a\x10\x49\xe6\x17\xd9\x1d\x36\x10\x94\xfa\x68\xf0\xff\x77\x98\x71\x30\x30\x5b\xea\xba\x2e\xda\x04\xdf\x99\x7b\x71\x4d\x6c\x6f\x2c\x29\xa6\xad\x5c\xb4\x02\x2b\x02\x70\x9b\xee\xad\x9d\x67\x89\x0c\xbb\x22\x39\x23\x36\xfe\xa1\x85\x1f\x38"
+}, {
+	.key	= "\x4c\xf5\x96\x83\x38\xe6\xae\x7f\x2d\x29\x25\x76\xd5\x75\x27\x86\x91\x9a\x27\x7a\xfb\x46\xc5\xef\x94\x81\x79\x57\x14\x59\x40\x68",
+	.nonce	= "\xca\xbf\x33\x71\x32\x45\x77\x8e",
+	.assoc	= "",
+	.alen	= 0,
+	.input	= "",
+	.ilen	= 0,
+	.result	= "\xea\xe0\x1e\x9e\x2c\x91\xaa\xe1\xdb\x5d\x99\x3f\x8a\xf7\x69\x92"
+}, {
+	.key	= "\x2d\xb0\x5d\x40\xc8\xed\x44\x88\x34\xd1\x13\xaf\x57\xa1\xeb\x3a\x2a\x80\x51\x36\xec\x5b\xbc\x8\x93\x84\x21\xb5\x13\x88\x3c\xd",
+	.nonce	= "\x3d\x86\xb5\x6b\xc8\xa3\x1f\x1d",
+	.assoc	= "\x33\x10\x41\x12\x1f\xf3\xd2\x6b",
+	.alen	= 8,
+	.input	= "",
+	.ilen	= 0,
+	.result	= "\xdd\x6b\x3b\x82\xce\x5a\xbd\xd6\xa9\x35\x83\xd8\x8c\x3d\x85\x77"
+}, {
+	.key	= "\x4b\x28\x4b\xa3\x7b\xbe\xe9\xf8\x31\x80\x82\xd7\xd8\xe8\xb5\xa1\xe2\x18\x18\x8a\x9c\xfa\xa3\x3d\x25\x71\x3e\x40\xbc\x54\x7a\x3e",
+	.nonce	= "\xd2\x32\x1f\x29\x28\xc6\xc4\xc4",
+	.assoc	= "\x6a\xe2\xad\x3f\x88\x39\x5a\x40",
+	.alen	= 8,
+	.input	= "\xa4",
+	.ilen	= 1,
+	.result	= "\xb7\x1b\xb0\x73\x59\xb0\x84\xb2\x6d\x8e\xab\x94\x31\xa1\xae\xac\x89"
+}, {
+	.key	= "\x66\xca\x9c\x23\x2a\x4b\x4b\x31\xe\x92\x89\x8b\xf4\x93\xc7\x87\x98\xa3\xd8\x39\xf8\xf4\xa7\x1\xc0\x2e\xa\xa6\x7e\x5a\x78\x87",
+	.nonce	= "\x20\x1c\xaa\x5f\x9c\xbf\x92\x30",
+	.assoc	= "",
+	.alen	= 0,
+	.input	= "\x2d",
+	.ilen	= 1,
+	.result	= "\xbf\xe1\x5b\xb\xdb\x6b\xf5\x5e\x6c\x5d\x84\x44\x39\x81\xc1\x9c\xac"
+}, {
+	.key	= "\x68\x7b\x8d\x8e\xe3\xc4\xdd\xae\xdf\x72\x7f\x53\x72\x25\x1e\x78\x91\xcb\x69\x76\x1f\x49\x93\xf9\x6f\x21\xcc\x39\x9c\xad\xb1\x1",
+	.nonce	= "\xdf\x51\x84\x82\x42\xc\x75\x9c",
+	.assoc	= "\x70\xd3\x33\xf3\x8b\x18\xb",
+	.alen	= 7,
+	.input	= "\x33\x2f\x94\xc1\xa4\xef\xcc\x2a\x5b\xa6\xe5\x8f\x1d\x40\xf0\x92\x3c\xd9\x24\x11\xa9\x71\xf9\x37\x14\x99\xfa\xbe\xe6\x80\xde\x50\xc9\x96\xd4\xb0\xec\x9e\x17\xec\xd2\x5e\x72\x99\xfc\xa\xe1\xcb\x48\xd2\x85\xdd\x2f\x90\xe0\x66\x3b\xe6\x20\x74\xbe\x23\x8f\xcb\xb4\xe4\xda\x48\x40\xa6\xd1\x1b\xc7\x42\xce\x2f\xc\xa6\x85\x6e\x87\x37\x3\xb1\x7c\x25\x96\xa3\x5\xd8\xb0\xf4\xed\xea\xc2\xf0\x31\x98\x6c\xd1\x14\x25\xc0\xcb\x1\x74\xd0\x82\xf4\x36\xf5\x41\xd5\xdc\xca\xc5\xbb\x98\xfe\xfc\x69\x21\x70\xd8\xa4\x4b\xc8\xde\x8f",
+	.ilen	= 129,
+	.result	= "\x8b\x6\xd3\x31\xb0\x93\x45\xb1\x75\x6e\x26\xf9\x67\xbc\x90\x15\x81\x2c\xb5\xf0\xc6\x2b\xc7\x8c\x56\xd1\xbf\x69\x6c\x7\xa0\xda\x65\x27\xc9\x90\x3d\xef\x4b\x11\xf\x19\x7\xfd\x29\x92\xd9\xc8\xf7\x99\x2e\x4a\xd0\xb8\x2c\xdc\x93\xf5\x9e\x33\x78\xd1\x37\xc3\x66\xd7\x5e\xbc\x44\xbf\x53\xa5\xbc\xc4\xcb\x7b\x3a\x8e\x7f\x2\xbd\xbb\xe7\xca\xa6\x6c\x6b\x93\x21\x93\x10\x61\xe7\x69\xd0\x78\xf3\x7\x5a\x1a\x8f\x73\xaa\xb1\x4e\xd3\xda\x4f\xf3\x32\xe1\x66\x3e\x6c\xc6\x13\xba\x6\x5b\xfc\x6a\xe5\x6f\x60\xfb\x7\x40\xb0\x8c\x9d\x84\x43\x6b\xc1\xf7\x8d\x8d\x31\xf7\x7a\x39\x4d\x8f\x9a\xeb"
+}, {
+	.key	= "\x8d\xb8\x91\x48\xf0\xe7\xa\xbd\xf9\x3f\xcd\xd9\xa0\x1e\x42\x4c\xe7\xde\x25\x3d\xa3\xd7\x5\x80\x8d\xf2\x82\xac\x44\x16\x51\x1",
+	.nonce	= "\xde\x7b\xef\xc3\x65\x1b\x68\xb0",
+	.assoc	= "",
+	.alen	= 0,
+	.input	= "\x9b\x18\xdb\xdd\x9a\xf\x3e\xa5\x15\x17\xde\xdf\x8\x9d\x65\xa\x67\x30\x12\xe2\x34\x77\x4b\xc1\xd9\xc6\x1f\xab\xc6\x18\x50\x17\xa7\x9d\x3c\xa6\xc5\x35\x8c\x1c\xc0\xa1\x7c\x9f\x3\x89\xca\xe1\xe6\xe9\xd4\xd3\x88\xdb\xb4\x51\x9d\xec\xb4\xfc\x52\xee\x6d\xf1\x75\x42\xc6\xfd\xbd\x7a\x8e\x86\xfc\x44\xb3\x4f\xf3\xea\x67\x5a\x41\x13\xba\xb0\xdc\xe1\xd3\x2a\x7c\x22\xb3\xca\xac\x6a\x37\x98\x3e\x1d\x40\x97\xf7\x9b\x1d\x36\x6b\xb3\x28\xbd\x60\x82\x47\x34\xaa\x2f\x7d\xe9\xa8\x70\x81\x57\xd4\xb9\x77\xa\x9d\x29\xa7\x84\x52\x4f\xc2\x4a\x40\x3b\x3c\xd4\xc9\x2a\xdb\x4a\x53\xc4\xbe\x80\xe9\x51\x7f\x8f\xc7\xa2\xce\x82\x5c\x91\x1e\x74\xd9\xd0\xbd\xd5\xf3\xfd\xda\x4d\x25\xb4\xbb\x2d\xac\x2f\x3d\x71\x85\x7b\xcf\x3c\x7b\x3e\xe\x22\x78\xc\x29\xbf\xe4\xf4\x57\xb3\xcb\x49\xa0\xfc\x1e\x5\x4e\x16\xbc\xd5\xa8\xa3\xee\x5\x35\xc6\x7c\xab\x60\x14\x55\x1a\x8e\xc5\x88\x5d\xd5\x81\xc2\x81\xa5\xc4\x60\xdb\xaf\x77\x91\xe1\xce\xa2\x7e\x7f\x42\xe3\xb0\x13\x1c\x1f\x25\x60\x21\xe2\x40\x5f\x99\xb7\x73\xec\x9b\x2b\xf0\x65\x11\xc8\xd0\xa\x9f\xd3",
+	.ilen	= 256,
+	.result	= "\x85\x4\xc2\xed\x8d\xfd\x97\x5c\xd2\xb7\xe2\xc1\x6b\xa3\xba\xf8\xc9\x50\xc3\xc6\xa5\xe3\xa4\x7c\xc3\x23\x49\x5e\xa9\xb9\x32\xeb\x8a\x7c\xca\xe5\xec\xfb\x7c\xc0\xcb\x7d\xdc\x2c\x9d\x92\x55\x21\xa\xc8\x43\x63\x59\xa\x31\x70\x82\x67\x41\x3\xf8\xdf\xf2\xac\xa7\x2\xd4\xd5\x8a\x2d\xc8\x99\x19\x66\xd0\xf6\x88\x2c\x77\xd9\xd4\xd\x6c\xbd\x98\xde\xe7\x7f\xad\x7e\x8a\xfb\xe9\x4b\xe5\xf7\xe5\x50\xa0\x90\x3f\xd6\x22\x53\xe3\xfe\x1b\xcc\x79\x3b\xec\x12\x47\x52\xa7\xd6\x4\xe3\x52\xe6\x93\x90\x91\x32\x73\x79\xb8\xd0\x31\xde\x1f\x9f\x2f\x5\x38\x54\x2f\x35\x4\x39\xe0\xa7\xba\xc6\x52\xf6\x37\x65\x4c\x7\xa9\x7e\xb3\x21\x6f\x74\x8c\xc9\xde\xdb\x65\x1b\x9b\xaa\x60\xb1\x3\x30\x6b\xb2\x3\xc4\x1c\x4\xf8\xf\x64\xaf\x46\xe4\x65\x99\x49\xe2\xea\xce\x78\x0\xd8\x8b\xd5\x2e\xcf\xfc\x40\x49\xe8\x58\xdc\x34\x9c\x8c\x61\xbf\xa\x8e\xec\x39\xa9\x30\x5\x5a\xd2\x56\x1\xc7\xda\x8f\x4e\xbb\x43\xa3\x3a\xf9\x15\x2a\xd0\xa0\x7a\x87\x34\x82\xfe\x8a\xd1\x2d\x5e\xc7\xbf\x4\x53\x5f\x3b\x36\xd4\x25\x5c\x34\x7a\x8d\xd5\x5\xce\x72\xca\xef\x7a\x4b\xbc\xb0\x10\x5c\x96\x42\x3a\x0\x98\xcd\x15\xe8\xb7\x53"
+}, {
+	.key	= "\xf2\xaa\x4f\x99\xfd\x3e\xa8\x53\xc1\x44\xe9\x81\x18\xdc\xf5\xf0\x3e\x44\x15\x59\xe0\xc5\x44\x86\xc3\x91\xa8\x75\xc0\x12\x46\xba",
+	.nonce	= "\xe\xd\x57\xbb\x7b\x40\x54\x2",
+	.assoc	= "",
+	.alen	= 0,
+	.input	= "\xc3\x9\x94\x62\xe6\x46\x2e\x10\xbe\x0\xe4\xfc\xf3\x40\xa3\xe2\xf\xc2\x8b\x28\xdc\xba\xb4\x3c\xe4\x21\x58\x61\xcd\x8b\xcd\xfb\xac\x94\xa1\x45\xf5\x1c\xe1\x12\xe0\x3b\x67\x21\x54\x5e\x8c\xaa\xcf\xdb\xb4\x51\xd4\x13\xda\xe6\x83\x89\xb6\x92\xe9\x21\x76\xa4\x93\x7d\xe\xfd\x96\x36\x3\x91\x43\x5c\x92\x49\x62\x61\x7b\xeb\x43\x89\xb8\x12\x20\x43\xd4\x47\x6\x84\xee\x47\xe9\x8a\x73\x15\xf\x72\xcf\xed\xce\x96\xb2\x7f\x21\x45\x76\xeb\x26\x28\x83\x6a\xad\xaa\xa6\x81\xd8\x55\xb1\xa3\x85\xb3\xc\xdf\xf1\x69\x2d\x97\x5\x2a\xbc\x7c\x7b\x25\xf8\x80\x9d\x39\x25\xf3\x62\xf0\x66\x5e\xf4\xa0\xcf\xd8\xfd\x4f\xb1\x1f\x60\x3a\x8\x47\xaf\xe1\xf6\x10\x77\x9\xa7\x27\x8f\x9a\x97\x5a\x26\xfa\xfe\x41\x32\x83\x10\xe0\x1d\xbf\x64\xd\xf4\x1c\x32\x35\xe5\x1b\x36\xef\xd4\x4a\x93\x4d\x0\x7c\xec\x2\x7\x8b\x5d\x7d\x1b\xe\xd1\xa6\xa5\x5d\x7d\x57\x88\xa8\xcc\x81\xb4\x86\x4e\xb4\x40\xe9\x1d\xc3\xb1\x24\x3e\x7f\xcc\x8a\x24\x9b\xdf\x6d\xf0\x39\x69\x3e\x4c\xc0\x96\xe4\x13\xda\x90\xda\xf4\x95\x66\x8b\x17\x17\xfe\x39\x43\x25\xaa\xda\xa0\x43\x3c\xb1\x41\x2\xa3\xf0\xa7\x19\x59\xbc\x1d\x7d\x6c\x6d\x91\x9\x5c\xb7\x5b\x1\xd1\x6f\x17\x21\x97\xbf\x89\x71\xa5\xb0\x6e\x7\x45\xfd\x9d\xea\x7\xf6\x7a\x9f\x10\x18\x22\x30\x73\xac\xd4\x6b\x72\x44\xed\xd9\x19\x9b\x2d\x4a\x41\xdd\xd1\x85\x5e\x37\x19\xed\xd2\x15\x8f\x5e\x91\xdb\x33\xf2\xe4\xdb\xff\x98\xfb\xa3\xb5\xca\x21\x69\x8\xe7\x8a\xdf\x90\xff\x3e\xe9\x20\x86\x3c\xe9\xfc\xb\xfe\x5c\x61\xaa\x13\x92\x7f\x7b\xec\xe0\x6d\xa8\x23\x22\xf6\x6b\x77\xc4\xfe\x40\x7\x3b\xb6\xf6\x8e\x5f\xd4\xb9\xb7\xf\x21\x4\xef\x83\x63\x91\x69\x40\xa3\x48\x5c\xd2\x60\xf9\x4f\x6c\x47\x8b\x3b\xb1\x9f\x8e\xee\x16\x8a\x13\xfc\x46\x17\xc3\xc3\x32\x56\xf8\x3c\x85\x3a\xb6\x3e\xaa\x89\x4f\xb3\xdf\x38\xfd\xf1\xe4\x3a\xc0\xe6\x58\xb5\x8f\xc5\x29\xa2\x92\x4a\xb6\xa0\x34\x7f\xab\xb5\x8a\x90\xa1\xdb\x4d\xca\xb6\x2c\x41\x3c\xf7\x2b\x21\xc3\xfd\xf4\x17\x5c\xb5\x33\x17\x68\x2b\x8\x30\xf3\xf7\x30\x3c\x96\xe6\x6a\x20\x97\xe7\x4d\x10\x5f\x47\x5f\x49\x96\x9\xf0\x27\x91\xc8\xf8\x5a\x2e\x79\xb5\xe2\xb8\xe8\xb9\x7b\xd5\x10\xcb\xff\x5d\x14\x73\xf3",
+	.ilen	= 512,
+	.result	= "\x14\xf6\x41\x37\xa6\xd4\x27\xcd\xdb\x6\x3e\x9a\x4e\xab\xd5\xb1\x1e\x6b\xd2\xbc\x11\xf4\x28\x93\x63\x54\xef\xbb\x5e\x1d\x3a\x1d\x37\x3c\xa\x6c\x1e\xc2\xd1\x2c\xb5\xa3\xb5\x7b\xb8\x8f\x25\xa6\x1b\x61\x1c\xec\x28\x58\x26\xa4\xa8\x33\x28\x25\x5c\x45\x5\xe5\x6c\x99\xe5\x45\xc4\xa2\x3\x84\x3\x73\x1e\x8c\x49\xac\x20\xdd\x8d\xb3\xc4\xf5\xe7\x4f\xf1\xed\xa1\x98\xde\xa4\x96\xdd\x2f\xab\xab\x97\xcf\x3e\xd2\x9e\xb8\x13\x7\x28\x29\x19\xaf\xfd\xf2\x49\x43\xea\x49\x26\x91\xc1\x7\xd6\xbb\x81\x75\x35\xd\x24\x7f\xc8\xda\xd4\xb7\xeb\xe8\x5c\x9\xa2\x2f\xdc\x28\x7d\x3a\x3\xfa\x94\xb5\x1d\x17\x99\x36\xc3\x1c\x18\x34\xe3\x9f\xf5\x55\x7c\xb0\x60\x9d\xff\xac\xd4\x61\xf2\xad\xf8\xce\xc7\xbe\x5c\xd2\x95\xa8\x4b\x77\x13\x19\x59\x26\xc9\xb7\x8f\x6a\xcb\x2d\x37\x91\xea\x92\x9c\x94\x5b\xda\xb\xce\xfe\x30\x20\xf8\x51\xad\xf2\xbe\xe7\xc7\xff\xb3\x33\x91\x6a\xc9\x1a\x41\xc9\xf\xf3\x10\xe\xfd\x53\xff\x6c\x16\x52\xd9\xf3\xf7\x98\x2e\xc9\x7\x31\x2c\xc\x72\xd7\xc5\xc6\x8\x2a\x7b\xda\xbd\x7e\x2\xea\x1a\xbb\xf2\x4\x27\x61\x28\x8e\xf5\x4\x3\x1f\x4c\x7\x55\x82\xec\x1e\xd7\x8b\x2f\x65\x56\xd1\xd9\x1e\x3c\xe9\x1f\x5e\x98\x70\x38\x4a\x8c\x49\xc5\x43\xa0\xa1\x8b\x74\x9d\x4c\x62\xd\x10\xc\xf4\x6c\x8f\xe0\xaa\x9a\x8d\xb7\xe0\xbe\x4c\x87\xf1\x98\x2f\xcc\xed\xc0\x52\x29\xdc\x83\xf8\xfc\x2c\xe\xa8\x51\x4d\x80\xd\xa3\xfe\xd8\x37\xe7\x41\x24\xfc\xfb\x75\xe3\x71\x7b\x57\x45\xf5\x97\x73\x65\x63\x14\x74\xb8\x82\x9f\xf8\x60\x2f\x8a\xf2\x4e\xf1\x39\xda\x33\x91\xf8\x36\xe0\x8d\x3f\x1f\x3b\x56\xdc\xa0\x8f\x3c\x9d\x71\x52\xa7\xb8\xc0\xa5\xc6\xa2\x73\xda\xf4\x4b\x74\x5b\x0\x3d\x99\xd7\x96\xba\xe6\xe1\xa6\x96\x38\xad\xb3\xc0\xd2\xba\x91\x6b\xf9\x19\xdd\x3b\xbe\xbe\x9c\x20\x50\xba\xa1\xd0\xce\x11\xbd\x95\xd8\xd1\xdd\x33\x85\x74\xdc\xdb\x66\x76\x44\xdc\x3\x74\x48\x35\x98\xb1\x18\x47\x94\x7d\xff\x62\xe4\x58\x78\xab\xed\x95\x36\xd9\x84\x91\x82\x64\x41\xbb\x58\xe6\x1c\x20\x6d\x15\x6b\x13\x96\xe8\x35\x7f\xdc\x40\x2c\xe9\xbc\x8a\x4f\x92\xec\x6\x2d\x50\xdf\x93\x5d\x65\x5a\xa8\xfc\x20\x50\x14\xa9\x8a\x7e\x1d\x8\x1f\xe2\x99\xd0\xbe\xfb\x3a\x21\x9d\xad\x86\x54\xfd\xd\x98\x1c\x5a\x6f\x1f\x9a\x40\xcd\xa2\xff\x6a\xf1\x54"
+}, {
+	.key	= "\xea\xbc\x56\x99\xe3\x50\xff\xc5\xcc\x1a\xd7\xc1\x57\x72\xea\x86\x5b\x89\x88\x61\x3d\x2f\x9b\xb2\xe7\x9c\xec\x74\x6e\x3e\xf4\x3b",
+	.nonce	= "\xef\x2d\x63\xee\x6b\x80\x8b\x78",
+	.assoc	= "\x5a\x27\xff\xeb\xdf\x84\xb2\x9e\xef",
+	.alen	= 9,
+	.input	= "\xe6\xc3\xdb\x63\x55\x15\xe3\x5b\xb7\x4b\x27\x8b\x5a\xdd\xc2\xe8\x3a\x6b\xd7\x81\x96\x35\x97\xca\xd7\x68\xe8\xef\xce\xab\xda\x9\x6e\xd6\x8e\xcb\x55\xb5\xe1\xe5\x57\xfd\xc4\xe3\xe0\x18\x4f\x85\xf5\x3f\x7e\x4b\x88\xc9\x52\x44\xf\xea\xaf\x1f\x71\x48\x9f\x97\x6d\xb9\x6f\x0\xa6\xde\x2b\x77\x8b\x15\xad\x10\xa0\x2b\x7b\x41\x90\x3\x2d\x69\xae\xcc\x77\x7c\xa5\x9d\x29\x22\xc2\xea\xb4\x0\x1a\xd2\x7a\x98\x8a\xf9\xf7\x82\xb0\xab\xd8\xa6\x94\x8d\x58\x2f\x1\x9e\x0\x20\xfc\x49\xdc\xe\x3\xe8\x45\x10\xd6\xa8\xda\x55\x10\x9a\xdf\x67\x22\x8b\x43\xab\x0\xbb\x2\xc8\xdd\x7b\x97\x17\xd7\x1d\x9e\x2\x5e\x48\xde\x8e\xcf\x99\x7\x95\x92\x3c\x5f\x9f\xc5\x8a\xc0\x23\xaa\xd5\x8c\x82\x6e\x16\x92\xb1\x12\x17\x7\xc3\xfb\x36\xf5\x6c\x35\xd6\x6\x1f\x9f\xa7\x94\xa2\x38\x63\x9c\xb0\x71\xb3\xa5\xd2\xd8\xba\x9f\x8\x1\xb3\xff\x4\x97\x73\x45\x1b\xd5\xa9\x9c\x80\xaf\x4\x9a\x85\xdb\x32\x5b\x5d\x1a\xc1\x36\x28\x10\x79\xf1\x3c\xbf\x1a\x41\x5c\x4e\xdf\xb2\x7c\x79\x3b\x7a\x62\x3d\x4b\xc9\x9b\x2a\x2e\x7c\xa2\xb1\x11\x98\xa7\x34\x1a\x0\xf3\xd1\xbc\x18\x22\xba\x2\x56\x62\x31\x10\x11\x6d\xe0\x54\x9d\x40\x1f\x26\x80\x41\xca\x3f\x68\xf\x32\x1d\xa\x8e\x79\xd8\xa4\x1b\x29\x1c\x90\x8e\xc5\xe3\xb4\x91\x37\x9a\x97\x86\x99\xd5\x9\xc5\xbb\xa3\x3f\x21\x29\x82\x14\x5c\xab\x25\xfb\xf2\x4f\x58\x26\xd4\x83\xaa\x66\x89\x67\x7e\xc0\x49\xe1\x11\x10\x7f\x7a\xda\x29\x4\xff\xf0\xcb\x9\x7c\x9d\xfa\x3\x6f\x81\x9\x31\x60\xfb\x8\xfa\x74\xd3\x64\x44\x7c\x55\x85\xec\x9c\x6e\x25\xb7\x6c\xc5\x37\xb6\x83\x87\x72\x95\x8b\x9d\xe1\x69\x5c\x31\x95\x42\xa6\x2c\xd1\x36\x47\x1f\xec\x54\xab\xa2\x1c\xd8\x0\xcc\xbc\xd\x65\xe2\x67\xbf\xbc\xea\xee\x9e\xe4\x36\x95\xbe\x73\xd9\xa6\xd9\xf\xa0\xcc\x82\x76\x26\xad\x5b\x58\x6c\x4e\xab\x29\x64\xd3\xd9\xa9\x8\x8c\x1d\xa1\x4f\x80\xd8\x3f\x94\xfb\xd3\x7b\xfc\xd1\x2b\xc3\x21\xeb\xe5\x1c\x84\x23\x7f\x4b\xfa\xdb\x34\x18\xa2\xc2\xe5\x13\xfe\x6c\x49\x81\xd2\x73\xe7\xe2\xd7\xe4\x4f\x4b\x8\x6e\xb1\x12\x22\x10\x9d\xac\x51\x1e\x17\xd9\x8a\xb\x42\x88\x16\x81\x37\x7c\x6a\xf7\xef\x2d\xe3\xd9\xf8\x5f\xe0\x53\x27\x74\xb9\xe2\xd6\x1c\x80\x2c\x52\x65",
+	.ilen	= 513,
+	.result	= "\xfd\x81\x8d\xd0\x3d\xb4\xd5\xdf\xd3\x42\x47\x5a\x6d\x19\x27\x66\x4b\x2e\xc\x27\x9c\x96\x4c\x72\x2\xa3\x65\xc3\xb3\x6f\x2e\xbd\x63\x8a\x4a\x5d\x29\xa2\xd0\x28\x48\xc5\x3d\x98\xa3\xbc\xe0\xbe\x3b\x3f\xe6\x8a\xa4\x7f\x53\x6\xfa\x7f\x27\x76\x72\x31\xa1\xf5\xd6\xc\x52\x47\xba\xcd\x4f\xd7\xeb\x5\x48\xd\x7c\x35\x4a\x9\xc9\x76\x71\x2\xa3\xfb\xb7\x1a\x65\xb7\xed\x98\xc6\x30\x8a\x0\xae\xa1\x31\xe5\xb5\x9e\x6d\x62\xda\xda\x7\xf\x38\x38\xd3\xcb\xc1\xb0\xad\xec\x72\xec\xb1\xa2\x7b\x59\xf3\x3d\x2b\xef\xcd\x28\x5b\x83\xcc\x18\x91\x88\xb0\x2e\xf9\x29\x31\x18\xf9\x4e\xe9\xa\x91\x92\x9f\xae\x2d\xad\xf4\xe6\x1a\xe2\xa4\xee\x47\x15\xbf\x83\x6e\xd7\x72\x12\x3b\x2d\x24\xe9\xb2\x55\xcb\x3c\x10\xf0\x24\x8a\x4a\x2\xea\x90\x25\xf0\xb4\x79\x3a\xef\x6e\xf5\x52\xdf\xb0\xa\xcd\x24\x1c\xd3\x2e\x22\x74\xea\x21\x6f\xe9\xbd\xc8\x3e\x36\x5b\x19\xf1\xca\x99\xa\xb4\xa7\x52\x1a\x4e\xf2\xad\x8d\x56\x85\xbb\x64\x89\xba\x26\xf9\xc7\xe1\x89\x19\x22\x77\xc3\xa8\xfc\xff\xad\xfe\xb9\x48\xae\x12\x30\x9f\x19\xfb\x1b\xef\x14\x87\x8a\x78\x71\xf3\xf4\xb7\x0\x9c\x1d\xb5\x3d\x49\x0\xc\x6\xd4\x50\xf9\x54\x45\xb2\x5b\x43\xdb\x6d\xcf\x1a\xe9\x7a\x7a\xcf\xfc\x8a\x4e\x4d\xb\x7\x63\x28\xd8\xe7\x8\x95\xdf\xa6\x72\x93\x2e\xbb\xa0\x42\x89\x16\xf1\xd9\xc\xf9\xa1\x16\xfd\xd9\x3\xb4\x3b\x8a\xf5\xf6\xe7\x6b\x2e\x8e\x4c\x3d\xe2\xaf\x8\x45\x3\xff\x9\xb6\xeb\x2d\xc6\x1b\x88\x94\xac\x3e\xf1\x9f\xe\xe\x2b\xd5\x0\x4d\x3f\x3b\x53\xae\xaf\x1c\x33\x5f\x55\x6e\x8d\xaf\x5\x7a\x10\x34\xc9\xf4\x66\xcb\x62\x12\xa6\xee\xe8\x1c\x5d\x12\x86\xdb\x6f\x1c\x33\xc4\x1c\xda\x82\x2d\x3b\x59\xfe\xb1\xa4\x59\x41\x86\xd0\xef\xae\xfb\xda\x6d\x11\xb8\xca\xe9\x6e\xff\xf7\xa9\xd9\x70\x30\xfc\x53\xe2\xd7\xa2\x4e\xc7\x91\xd9\x7\x6\xaa\xdd\xb0\x59\x28\x1d\x0\x66\xc5\x54\xc2\xfc\x6\xda\x5\x90\x52\x1d\x37\x66\xee\xf0\xb2\x55\x8a\x5d\xd2\x38\x86\x94\x9b\xfc\x10\x4c\xa1\xb9\x64\x3e\x44\xb8\x5f\xb0\xc\xec\xe0\xc9\xe5\x62\x75\x3f\x9\xd5\xf5\xd9\x26\xba\x9e\xd2\xf4\xb9\x48\xa\xbc\xa2\xd6\x7c\x36\x11\x7d\x26\x81\x89\xcf\xa4\xad\x73\xe\xee\xcc\x6\xa9\xdb\xb1\xfd\xfb\x9\x7f\x90\x42\x37\x2f\xe1\x9c\xf\x6f\xcf\x43\xb5\xd9\x90\xe1\x85\xf5\xa8\xae"
+}, {
+	.key	= "\x47\x11\xeb\x86\x2b\x2c\xab\x44\x34\xda\x7f\x57\x3\x39\xc\xaf\x2c\x14\xfd\x65\x23\xe9\x8e\x74\xd5\x8\x68\x8\xe7\xb4\x72\xd7",
+	.nonce	= "\xdb\x92\xf\x7f\x17\x54\xc\x30",
+	.assoc	= "\xd2\xa1\x70\xdb\x7a\xf8\xfa\x27\xba\x73\xf\xbf\x3d\x1e\x82\xb2",
+	.alen	= 16,
+	.input	= "\x42\x93\xe4\xeb\x97\xb0\x57\xbf\x1a\x8b\x1f\xe4\x5f\x36\x20\x3c\xef\xa\xa9\x48\x5f\x5f\x37\x22\x3a\xde\xe3\xae\xbe\xad\x7\xcc\xb1\xf6\xf5\xf9\x56\xdd\xe7\x16\x1e\x7f\xdf\x7a\x9e\x75\xb7\xc7\xbe\xbe\x8a\x36\x4\xc0\x10\xf4\x95\x20\x3\xec\xdc\x5\xa1\x7d\xc4\xa9\x2c\x82\xd0\xbc\x8b\xc5\xc7\x45\x50\xf6\xa2\x1a\xb5\x46\x3b\x73\x2\xa6\x83\x4b\x73\x82\x58\x5e\x3b\x65\x2f\xe\xfd\x2b\x59\x16\xce\xa1\x60\x9c\xe8\x3a\x99\xed\x8d\x5a\xcf\xf6\x83\xaf\xba\xd7\x73\x73\x40\x97\x3d\xca\xef\x7\x57\xe6\xd9\x70\xe\x95\xae\xa6\x8d\x4\xcc\xee\xf7\x9\x31\x77\x12\xa3\x23\x97\x62\xb3\x7b\x32\xfb\x80\x14\x48\x81\xc3\xe5\xea\x91\x39\x52\x81\xa2\x4f\xe4\xb3\x9\xff\xde\x5e\xe9\x58\x84\x6e\xf9\x3d\xdf\x25\xea\xad\xae\xe6\x9a\xd1\x89\x55\xd3\xde\x6c\x52\xdb\x70\xfe\x37\xce\x44\xa\xa8\x25\x5f\x92\xc1\x33\x4a\x4f\x9b\x62\x35\xff\xce\xc0\xa9\x60\xce\x52\x0\x97\x51\x35\x26\x2e\xb9\x36\xa9\x87\x6e\x1e\xcc\x91\x78\x53\x98\x86\x5b\x9c\x74\x7d\x88\x33\xe1\xdf\x37\x69\x2b\xbb\xf1\x4d\xf4\xd1\xf1\x39\x93\x17\x51\x19\xe3\x19\x1e\x76\x37\x25\xfb\x9\x27\x6a\xab\x67\x6f\x14\x12\x64\xe7\xc4\x7\xdf\x4d\x17\xbb\x6d\xe0\xe9\xb9\xab\xca\x10\x68\xaf\x7e\xb7\x33\x54\x73\x7\x6e\xf7\x81\x97\x9c\x5\x6f\x84\x5f\xd2\x42\xfb\x38\xcf\xd1\x2f\x14\x30\x88\x98\x4d\x5a\xa9\x76\xd5\x4f\x3e\x70\x6c\x85\x76\xd7\x1\xa0\x1a\xc8\x4e\xaa\xac\x78\xfe\x46\xde\x6a\x5\x46\xa7\x43\xc\xb9\xde\xb9\x68\xfb\xce\x42\x99\x7\x4d\xb\x3b\x5a\x30\x35\xa8\xf9\x3a\x73\xef\xf\xdb\x1e\x16\x42\xc4\xba\xae\x58\xaa\xf8\xe5\x75\x2f\x1b\x15\x5c\xfd\xa\x97\xd0\xe4\x37\x83\x61\x5f\x43\xa6\xc7\x3f\x38\x59\xe6\xeb\xa3\x90\xc3\xaa\xaa\x5a\xd3\x34\xd4\x17\xc8\x65\x3e\x57\xbc\x5e\xdd\x9e\xb7\xf0\x2e\x5b\xb2\x1f\x8a\x8\xd\x45\x91\xb\x29\x53\x4f\x4c\x5a\x73\x56\xfe\xaf\x41\x1\x39\xa\x24\x3c\x7e\xbe\x4e\x53\xf3\xeb\x6\x66\x51\x28\x1d\xbd\x41\xa\x1\xab\x16\x47\x27\x47\x47\xf7\xcb\x46\xa\x70\x9e\x1\x9c\x9\xe1\x2a\x0\x1a\xd8\xd4\x79\x9d\x80\x15\x8e\x53\x2a\x65\x83\x78\x3e\x3\x0\x7\x12\x1f\x33\x3e\x7b\x13\x37\xf1\xc3\xef\xb7\xc1\x20\x3c\x3e\x67\x66\x5d\x88\xa7\x7d\x33\x50\x77\xb0\x28\x8e\xe7\x2c\x2e\x7a\xf4\x3c\x8d\x74\x83\xaf\x8e\x87\xf\xe4\x50\xff\x84\x5c\x47\xc\x6a\x49\xbf\x42\x86\x77\x15\x48\xa5\x90\x5d\x93\xd6\x2a\x11\xd5\xd5\x11\xaa\xce\xe7\x6f\xa5\xb0\x9\x2c\x8d\xd3\x92\xf0\x5a\x2a\xda\x5b\x1e\xd5\x9a\xc4\xc4\xf3\x49\x74\x41\xca\xe8\xc1\xf8\x44\xd6\x3c\xae\x6c\x1d\x9a\x30\x4\x4d\x27\xe\xb1\x5f\x59\xa2\x24\xe8\xe1\x98\xc5\x6a\x4c\xfe\x41\xd2\x27\x42\x52\xe1\xe9\x7d\x62\xe4\x88\xf\xad\xb2\x70\xcb\x9d\x4c\x27\x2e\x76\x1e\x1a\x63\x65\xf5\x3b\xf8\x57\x69\xeb\x5b\x38\x26\x39\x33\x25\x45\x3e\x91\xb8\xd8\xc7\xd5\x42\xc0\x22\x31\x74\xf4\xbc\xc\x23\xf1\xca\xc1\x8d\xd7\xbe\xc9\x62\xe4\x8\x1a\xcf\x36\xd5\xfe\x55\x21\x59\x91\x87\x87\xdf\x6\xdb\xdf\x96\x45\x58\xda\x5\xcd\x50\x4d\xd2\x7d\x5\x18\x73\x6a\x8d\x11\x85\xa6\x88\xe8\xda\xe6\x30\x33\xa4\x89\x31\x75\xbe\x69\x43\x84\x43\x50\x87\xdd\x71\x36\x83\xc3\x78\x74\x24\xa\xed\x7b\xdb\xa4\x24\xb\xb9\x7e\x5d\xff\xde\xb1\xef\x61\x5a\x45\x33\xf6\x17\x7\x8\x98\x83\x92\xf\x23\x6d\xe6\xaa\x17\x54\xad\x6a\xc8\xdb\x26\xbe\xb8\xb6\x8\xfa\x68\xf1\xd7\x79\x6f\x18\xb4\x9e\x2d\x3f\x1b\x64\xaf\x8d\x6\xe\x49\x28\xe0\x5d\x45\x68\x13\x87\xfa\xde\x40\x7b\xd2\xc3\x94\xd5\xe1\xd9\xc2\xaf\x55\x89\xeb\xb4\x12\x59\xa8\xd4\xc5\x29\x66\x38\xe6\xac\x22\x22\xd9\x64\x9b\x34\xa\x32\x9f\xc2\xbf\x17\x6c\x3f\x71\x7a\x38\x6b\x98\xfb\x49\x36\x89\xc9\xe2\xd6\xc7\x5d\xd0\x69\x5f\x23\x35\xc9\x30\xe2\xfd\x44\x58\x39\xd7\x97\xfb\x5c\x0\xd5\x4f\x7a\x1a\x95\x8b\x62\x4b\xce\xe5\x91\x21\x7b\x30\x0\xd6\xdd\x6d\x2\x86\x49\xf\x3c\x1a\x27\x3c\xd3\xe\x71\xf2\xff\xf5\x2f\x87\xac\x67\x59\x81\xa3\xf7\xf8\xd6\x11\xc\x84\xa9\x3\xee\x2a\xc4\xf3\x22\xab\x7c\xe2\x25\xf5\x67\xa3\xe4\x11\xe0\x59\xb3\xca\x87\xa0\xae\xc9\xa6\x62\x1b\x6e\x4d\x2\x6b\x7\x9d\xfd\xd0\x92\x6\xe1\xb2\x9a\x4a\x1f\x1f\x13\x49\x99\x97\x8\xde\x7f\x98\xaf\x51\x98\xee\x2c\xcb\xf0\xb\xc6\xb6\xb7\x2d\x9a\xb1\xac\xa6\xe3\x15\x77\x9d\x6b\x1a\xe4\xfc\x8b\xf2\x17\x59\x8\x4\x58\x81\x9d\x1b\x1b\x69\x55\xc2\xb4\x3c\x1f\x50\xf1\x7f\x77\x90\x4c\x66\x40\x5a\xc0\x33\x1f\xcb\x5\x6d\x5c\x6\x87\x52\xa2\x8f\x26\xd5\x4f",
+	.ilen	= 1024,
+	.result	= "\xe5\x26\xa4\x3d\xbd\x33\xd0\x4b\x6f\x5\xa7\x6e\x12\x7a\xd2\x74\xa6\xdd\xbd\x95\xeb\xf9\xa4\xf1\x59\x93\x91\x70\xd9\xfe\x9a\xcd\x53\x1f\x3a\xab\xa6\x7c\x9f\xa6\x9e\xbd\x99\xd9\xb5\x97\x44\xd5\x14\x48\x4d\x9d\xc0\xd0\x5\x96\xeb\x4c\x78\x55\x9\x8\x1\x2\x30\x90\x7b\x96\x7a\x7b\x5f\x30\x41\x24\xce\x68\x61\x49\x86\x57\x82\xdd\x53\x1c\x51\x28\x2b\x53\x6e\x2d\xc2\x20\x4c\xdd\x8f\x65\x10\x20\x50\xdd\x9d\x50\xe5\x71\x40\x53\x69\xfc\x77\x48\x11\xb9\xde\xa4\x8d\x58\xe4\xa6\x1a\x18\x47\x81\x7e\xfc\xdd\xf6\xef\xce\x2f\x43\x68\xd6\x6\xe2\x74\x6a\xad\x90\xf5\x37\xf3\x3d\x82\x69\x40\xe9\x6b\xa7\x3d\xa8\x1e\xd2\x2\x7c\xb7\x9b\xe4\xda\x8f\x95\x6\xc5\xdf\x73\xa3\x20\x9a\x49\xde\x9c\xbc\xee\x14\x3f\x81\x5e\xf8\x3b\x59\x3c\xe1\x68\x12\x5a\x3a\x76\x3a\x3f\xf7\x87\x33\xa\x1\xb8\xd4\xed\xb6\xbe\x94\x5e\x70\x40\x56\x67\x1f\x50\x44\x19\xce\x82\x70\x10\x87\x13\x20\xb\x4c\x5a\xb6\xf6\xa7\xae\x81\x75\x1\x81\xe6\x4b\x57\x7c\xdd\x6d\xf8\x1c\x29\x32\xf7\xda\x3c\x2d\xf8\x9b\x25\x6e\x0\xb4\xf7\x2f\xf7\x4\xf7\xa1\x56\xac\x4f\x1a\x64\xb8\x47\x55\x18\x7b\x7\x4d\xbd\x47\x24\x80\x5d\xa2\x70\xc5\xdd\x8e\x82\xd4\xeb\xec\xb2\xc\x39\xd2\x97\xc1\xcb\xeb\xf4\x77\x59\xb4\x87\xef\xcb\x43\x2d\x46\x54\xd1\xa7\xd7\x15\x99\xa\x43\xa1\xe0\x99\x33\x71\xc1\xed\xfe\x72\x46\x33\x8e\x91\x8\x9f\xc8\x2e\xca\xfa\xdc\x59\xd5\xc3\x76\x84\x9f\xa3\x37\x68\xc3\xf0\x47\x2c\x68\xdb\x5e\xc3\x49\x4c\xe8\x92\x85\xe2\x23\xd3\x3f\xad\x32\xe5\x2b\x82\xd7\x8f\x99\xa\x59\x5c\x45\xd9\xb4\x51\x52\xc2\xae\xbf\x80\xcf\xc9\xc9\x51\x24\x2a\x3b\x3a\x4d\xae\xeb\xbd\x22\xc3\xe\xf\x59\x25\x92\x17\xe9\x74\xc7\x8b\x70\x70\x36\x55\x95\x75\x4b\xad\x61\x2b\x9\xbc\x82\xf2\x6e\x94\x43\xae\xc3\xd5\xcd\x8e\xfe\x5b\x9a\x88\x43\x1\x75\xb2\x23\x9\xf7\x89\x83\xe7\xfa\xf9\xb4\x9b\xf8\xef\xbd\x1c\x92\xc1\xda\x7e\xfe\x5\xba\x5a\xcd\x7\x6a\x78\x9e\x5d\xfb\x11\x2f\x79\x38\xb6\xc2\x5b\x6b\x51\xb4\x71\xdd\xf7\x2a\xe4\xf4\x72\x76\xad\xc2\xdd\x64\x5d\x79\xb6\xf5\x7a\x77\x20\x5\x3d\x30\x6\xd4\x4c\xa\x2c\x98\x5a\xb9\xd4\x98\xa9\x3f\xc6\x12\xea\x3b\x4b\xc5\x79\x64\x63\x6b\x9\x54\x3b\x14\x27\xba\x99\x80\xc8\x72\xa8\x12\x90\x29\xba\x40\x54\x97\x2b\x7b\xfe\xeb\xcd\x1\x5\x44\x72\xdb\x99\xe4\x61\xc9\x69\xd6\xb9\x28\xd1\x5\x3e\xf9\xb\x49\xa\x49\xe9\x8d\xe\xa7\x4a\xf\xaf\x32\xd0\xe0\xb2\x3a\x55\x58\xfe\x5c\x28\x70\x51\x23\xb0\x7b\x6a\x5f\x1e\xb8\x17\xd7\x94\x15\x8f\xee\x20\xc7\x42\x25\x3e\x9a\x14\xd7\x60\x72\x39\x47\x48\xa9\xfe\xdd\x47\xa\xb1\xe6\x60\x28\x8c\x11\x68\xe1\xff\xd7\xce\xc8\xbe\xb3\xfe\x27\x30\x9\x70\xd7\xfa\x2\x33\x3a\x61\x2e\xc7\xff\xa4\x2a\xa8\x6e\xb4\x79\x35\x6d\x4c\x1e\x38\xf8\xee\xd4\x84\x4e\x6e\x28\xa7\xce\xc8\xc1\xcf\x80\x5\xf3\x4\xef\xc8\x18\x28\x2e\x8d\x5e\xc\xdf\xb8\x5f\x96\xe8\xc6\x9c\x2f\xe5\xa6\x44\xd7\xe7\x99\x44\xc\xec\xd7\x5\x60\x97\xbb\x74\x77\x58\xd5\xbb\x48\xde\x5a\xb2\x54\x7f\xe\x46\x70\x6a\x6f\x78\xa5\x8\x89\x5\x4e\x7e\xa0\x69\xb4\x40\x60\x55\x77\x75\x9b\x19\xf2\xd5\x13\x80\x77\xf9\x4b\x3f\x1e\xee\xe6\x76\x84\x7b\x8c\xe5\x27\xa8\xa\x91\x1\x68\x71\x8a\x3f\x6\xab\xf6\xa9\xa5\xe6\x72\x92\xe4\x67\xe2\xa2\x46\x35\x84\x55\x7d\xca\xa8\x85\xd0\xf1\x3f\xbe\xd7\x34\x64\xfc\xae\xe3\xe4\x4\x9f\x66\x2\xb9\x88\x10\xd9\xc4\x4c\x31\x43\x7a\x93\xe2\x9b\x56\x43\x84\xdc\xdc\xde\x1d\xa4\x2\xe\xc2\xef\xc3\xf8\x78\xd1\xb2\x6b\x63\x18\xc9\xa9\xe5\x72\xd8\xf3\xb9\xd1\x8a\xc7\x1a\x2\x27\x20\x77\x10\xe5\xc8\xd4\x4a\x47\xe5\xdf\x5f\x1\xaa\xb0\xd4\x10\xbb\x69\xe3\x36\xc8\xe1\x3d\x43\xfb\x86\xcd\xcc\xbf\xf4\x88\xe0\x20\xca\xb7\x1b\xf1\x2f\x5c\xee\xd4\xd3\xa3\xcc\xa4\x1e\x1c\x47\xfb\xbf\xfc\xa2\x41\x55\x9d\xf6\x5a\x5e\x65\x32\x34\x7b\x52\x8d\xd5\xd0\x20\x60\x3\xab\x3f\x8c\xd4\x21\xea\x2a\xd9\xc4\xd0\xd3\x65\xd8\x7a\x13\x28\x62\x32\x4b\x2c\x87\x93\xa8\xb4\x52\x45\x9\x44\xec\xec\xc3\x17\xdb\x9a\x4d\x5c\xa9\x11\xd4\x7d\xaf\x9e\xf1\x2d\xb2\x66\xc5\x1d\xed\xb7\xcd\xb\x25\x5e\x30\x47\x3f\x40\xf4\xa1\xa0\x0\x94\x10\xc5\x6a\x63\x1a\xd5\x88\x92\x8e\x82\x39\x87\x3c\x78\x65\x58\x42\x75\x5b\xdd\x77\x3e\x9\x4e\x76\x5b\xe6\xe\x4d\x38\xb2\xc0\xb8\x95\x1\x7a\x10\xe0\xfb\x7\xf2\xab\x2d\x8c\x32\xed\x2b\xc0\x46\xc2\xf5\x38\x83\xf0\x17\xec\xc1\x20\x6a\x9a\xb\x0\xa0\x98\x22\x50\x23\xd5\x80\x6b\xf6\x1f\xc3\xcc\x97\xc9\x24\x9f\xf3\xaf\x43\x14\xd5\xa0"
+}, {
+	.key	= "\x35\x4e\xb5\x70\x50\x42\x8a\x85\xf2\xfb\xed\x7b\xd0\x9e\x97\xca\xfa\x98\x66\x63\xee\x37\xcc\x52\xfe\xd1\xdf\x95\x15\x34\x29\x38",
+	.nonce	= "\xfd\x87\xd4\xd8\x62\xfd\xec\xaa",
+	.assoc	= "\xd6\x31\xda\x5d\x42\x5e\xd7",
+	.alen	= 7,
+	.input	= "\x7a\x57\xf2\xc7\x6\x3f\x50\x7b\x36\x1a\x66\x5c\xb9\xe\x5e\x3b\x45\x60\xbe\x9a\x31\x9f\xff\x5d\x66\x34\xb4\xdc\xfb\x9d\x8e\xee\x6a\x33\xa4\x7\x3c\xf9\x4c\x30\xa1\x24\x52\xf9\x50\x46\x88\x20\x2\x32\x3a\xe\x99\x63\xaf\x1f\x15\x28\x2a\x5\xff\x57\x59\x5e\x18\xa1\x1f\xd0\x92\x5c\x88\x66\x1b\x0\x64\xa5\x93\x8d\x6\x46\xb0\x64\x8b\x8b\xef\x99\x5\x35\x85\xb3\xf3\x33\xbb\xec\x66\xb6\x3d\x57\x42\xe3\xb4\xc6\xaa\xb0\x41\x2a\xb9\x59\xa9\xf6\x3e\x15\x26\x12\x3\x21\x4c\x74\x43\x13\x2a\x3\x27\x9\xb4\xfb\xe7\xb7\x40\xff\x5e\xce\x48\x9a\x60\xe3\x8b\x80\x8c\x38\x2d\xcb\x93\x37\x74\x5\x52\x6f\x73\x3e\xc3\xbc\xca\x72\xa\xeb\xf1\x3b\xa0\x95\xdc\x8a\xc4\xa9\xdc\xca\x44\xd8\x8\x63\x6a\x36\xd3\x3c\xb8\xac\x46\x7d\xfd\xaa\xeb\x3e\xf\x45\x8f\x49\xda\x2b\xf2\x12\xbd\xaf\x67\x8a\x63\x48\x4b\x55\x5f\x6d\x8c\xb9\x76\x34\x84\xae\xc2\xfc\x52\x64\x82\xf7\xb0\x6\xf0\x45\x73\x12\x50\x30\x72\xea\x78\x9a\xa8\xaf\xb5\xe3\xbb\x77\x52\xec\x59\x84\xbf\x6b\x8f\xce\x86\x5e\x1f\x23\xe9\xfb\x8\x86\xf7\x10\xb9\xf2\x44\x96\x44\x63\xa9\xa8\x78\x0\x23\xd6\xc7\xe7\x6e\x66\x4f\xcc\xee\x15\xb3\xbd\x1d\xa0\xe5\x9c\x1b\x24\x2c\x4d\x3c\x62\x35\x9c\x88\x59\x9\xdd\x82\x1b\xcf\xa\x83\x6b\x3f\xae\x3\xc4\xb4\xdd\x7e\x5b\x28\x76\x25\x96\xd9\xc9\x9d\x5f\x86\xfa\xf6\xd7\xd2\xe6\x76\x1d\xf\xa1\xdc\x74\x5\x1b\x1d\xe0\xcd\x16\xb0\xa8\x8a\x34\x7b\x15\x11\x77\xe5\x7b\x7e\x20\xf7\xda\x38\xda\xce\x70\xe9\xf5\x6c\xd9\xbe\xc\x4c\x95\x4c\xc2\x9b\x34\x55\x55\xe1\xf3\x46\x8e\x48\x74\x14\x4f\x9d\xc9\xf5\xe8\x1a\xf0\x11\x4a\xc1\x8d\xe0\x93\xa0\xbe\x9\x1c\x2b\x4e\xf\xb2\x87\x8b\x84\xfe\x92\x32\x14\xd7\x93\xdf\xe7\x44\xbc\xc5\xae\x53\x69\xd8\xb3\x79\x37\x80\xe3\x17\x5c\xec\x53\x0\x9a\xe3\x8e\xdc\x38\xb8\x66\xf0\xd3\xad\x1d\x2\x96\x86\x3e\x9d\x3b\x5d\xa5\x7f\x21\x10\xf1\x1f\x13\x20\xf9\x57\x87\x20\xf5\x5f\xf1\x17\x48\xa\x51\x5a\xcd\x19\x3\xa6\x5a\xd1\x12\x97\xe9\x48\xe2\x1d\x83\x75\x50\xd9\x75\x7d\x6a\x82\xa1\xf9\x4e\x54\x87\x89\xc9\xc\xb7\x5b\x6a\x91\xc1\x9c\xb2\xa9\xdc\x9a\xa4\x49\xa\x6d\xd\xbb\xde\x86\x44\xdd\x5d\x89\x2b\x96\xf\x23\x95\xad\xcc\xa2\xb3\xb9\x7e\x74\x38\xba\x9f\x73\xae\x5f\xf8\x68\xa2\xe0\xa9\xce\xbd\x40\xd4\x4c\x6b\xd2\x56\x62\xb0\xcc\x63\x7e\x5b\xd3\xae\xd1\x75\xce\xbb\xb4\x5b\xa8\xf8\xb4\xac\x71\x75\xaa\xc9\x9f\xbb\x6c\xad\xf\x55\x5d\xe8\x85\x7d\xf9\x21\x35\xea\x92\x85\x2b\x0\xec\x84\x90\xa\x63\x96\xe4\x6b\xa9\x77\xb8\x91\xf8\x46\x15\x72\x63\x70\x1\x40\xa3\xa5\x76\x62\x2b\xbf\xf1\xe5\x8d\x9f\xa3\xfa\x9b\x3\xbe\xfe\x65\x6f\xa2\x29\xd\x54\xb4\x71\xce\xa9\xd6\x3d\x88\xf9\xaf\x6b\xa8\x9e\xf4\x16\x96\x36\xb9\x0\xdc\x10\xab\xb5\x8\x31\x1f\x0\xb1\x3c\xd9\x38\x3e\xc6\x4\xa7\x4e\xe8\xae\xed\x98\xc2\xf7\xb9\x0\x5f\x8c\x60\xd1\xe5\x15\xf7\xae\x1e\x84\x88\xd1\xf6\xbc\x3a\x89\x35\x22\x83\x7c\xca\xf0\x33\x82\x4c\x79\x3c\xfd\xb1\xae\x52\x62\x55\xd2\x41\x60\xc6\xbb\xfa\xe\x59\xd6\xa8\xfe\x5d\xed\x47\x3d\xe0\xea\x1f\x6e\x43\x51\xec\x10\x52\x56\x77\x42\x6b\x52\x87\xd8\xec\xe0\xaa\x76\xa5\x84\x2a\x22\x24\xfd\x92\x40\x88\xd5\x85\x1c\x1f\x6b\x47\xa0\xc4\xe4\xef\xf4\xea\xd7\x59\xac\x2a\x9e\x8c\xfa\x1f\x42\x8\xfe\x4f\x74\xa0\x26\xf5\xb3\x84\xf6\x58\x5f\x26\x66\x3e\xd7\xe4\x22\x91\x13\xc8\xac\x25\x96\x23\xd8\x9\xea\x45\x75\x23\xb8\x5f\xc2\x90\x8b\x9\xc4\xfc\x47\x6c\x6d\xa\xef\x69\xa4\x38\x19\xcf\x7d\xf9\x9\x73\x9b\x60\x5a\xf7\x37\xb5\xfe\x9f\xe3\x2b\x4c\xd\x6e\x19\xf1\xd6\xc0\x70\xf3\x9d\x22\x3c\xf9\x49\xce\x30\x8e\x44\xb5\x76\x15\x8f\x52\xfd\xa5\x4\xb8\x55\x6a\x36\x59\x7c\xc4\x48\xb8\xd7\xab\x5\x66\xe9\x5e\x21\x6f\x6b\x36\x29\xbb\xe9\xe3\xa2\x9a\xa8\xcd\x55\x25\x11\xba\x5a\x58\xa0\xde\xae\x19\x2a\x48\x5a\xff\x36\xcd\x6d\x16\x7a\x73\x38\x46\xe5\x47\x59\xc8\xa2\xf6\xe2\x6c\x83\xc5\x36\x2c\x83\x7d\xb4\x1\x5\x69\xe7\xaf\x5c\xc4\x64\x82\x12\x21\xef\xf7\xd1\x7d\xb8\x8d\x8c\x98\x7c\x5f\x7d\x92\x88\xb9\x94\x7\x9c\xd8\xe9\x9c\x17\x38\xe3\x57\x6c\xe0\xdc\xa5\x92\x42\xb3\xbd\x50\xa2\x7e\xb5\xb1\x52\x72\x3\x97\xd8\xaa\x9a\x1e\x75\x41\x11\xa3\x4f\xcc\xd4\xe3\x73\xad\x96\xdc\x47\x41\x9f\xb0\xbe\x79\x91\xf5\xb6\x18\xfe\xc2\x83\x18\x7d\x73\xd9\x4f\x83\x84\x3\xb3\xf0\x77\x66\x3d\x83\x63\x2e\x2c\xf9\xdd\xa6\x1f\x89\x82\xb8\x23\x42\xeb\xe2\xca\x70\x82\x61\x41\xa\x6d\x5f\x75\xc5\xe2\xc4\x91\x18\x44\x22\xfa\x34\x10\xf5\x20\xdc\xb7\xdd\x2a\x20\x77\xf5\xf9\xce\xdb\xa0\xa\x52\x2a\x4e\xdd\xcc\x97\xdf\x5\xe4\x5e\xb7\xaa\xf0\xe2\x80\xff\xba\x1a\xf\xac\xdf\x2\x32\xe6\xf7\xc7\x17\x13\xb7\xfc\x98\x48\x8c\xd\x82\xc9\x80\x7a\xe2\xa\xc5\xb4\xde\x7c\x3c\x79\x81\xe\x28\x65\x79\x67\x82\x69\x44\x66\x9\xf7\x16\x1a\xf9\x7d\x80\xa1\x79\x14\xa9\xc8\x20\xfb\xa2\x46\xbe\x8\x35\x17\x58\xc1\x1a\xda\x2a\x6b\x2e\x1e\xe6\x27\x55\x7b\x19\xe2\xfb\x64\xfc\x5e\x15\x54\x3c\xe7\xc2\x11\x50\x30\xb8\x72\x3\xb\x1a\x9f\x86\x27\x11\x5c\x6\x2b\xbd\x75\x1a\xa\xda\x1\xfa\x5c\x4a\xc1\x80\x3a\x6e\x30\xc8\x2c\xeb\x56\xec\x89\xfa\x35\x7b\xb2\xf0\x97\x8\x86\x53\xbe\xbd\x40\x41\x38\x1c\xb4\x8b\x79\x2e\x18\x96\x94\xde\xe8\xca\xe5\x9f\x92\x9f\x15\x5d\x56\x60\x5c\x9\xf9\x16\xf4\x17\xf\xf6\x4c\xda\xe6\x67\x89\x9f\xca\x6c\xe7\x9b\x4\x62\xe\x26\xa6\x52\xbd\x29\xff\xc7\xa4\x96\xe6\x6a\x2\xa5\x2e\x7b\xfe\x97\x68\x3e\x2e\x5f\x3b\xf\x36\xd6\x98\x19\x59\x48\xd2\xc6\xe1\x55\x1a\x6e\xd6\xed\x2c\xba\xc3\x9e\x64\xc9\x95\x86\x35\x5e\x3e\x88\x69\x99\x4b\xee\xbe\x9a\x99\xb5\x6e\x58\xae\xdd\x22\xdb\xdd\x6b\xfc\xaf\x90\xa3\x3d\xa4\xc1\x15\x92\x18\x8d\xd2\x4b\x7b\x6\xd1\x37\xb5\xe2\x7c\x2c\xf0\x25\xe4\x94\x2a\xbd\xe3\x82\x70\x78\xa3\x82\x10\x5a\x90\xd7\xa4\xfa\xaf\x1a\x88\x59\xdc\x74\x12\xb4\x8e\xd7\x19\x46\xf4\x84\x69\x9f\xbb\x70\xa8\x4c\x52\x81\xa9\xff\x76\x1c\xae\xd8\x11\x3d\x7f\x7d\xc5\x12\x59\x28\x18\xc2\xa2\xb7\x1c\x88\xf8\xd6\x1b\xa6\x7d\x9e\xde\x29\xf8\xed\xff\xeb\x92\x24\x4f\x5\xaa\xd9\x49\xba\x87\x59\x51\xc9\x20\x5c\x9b\x74\xcf\x3\xd9\x2d\x34\xc7\x5b\xa5\x40\xb2\x99\xf5\xcb\xb4\xf6\xb7\x72\x4a\xd6\xbd\xb0\xf3\x93\xe0\x1b\xa8\x4\x1e\x35\xd4\x80\x20\xf4\x9c\x31\x6b\x45\xb9\x15\xb0\x5e\xdd\xa\x33\x9c\x83\xcd\x58\x89\x50\x56\xbb\x81\x0\x91\x32\xf3\x1b\x3e\xcf\x45\xe1\xf9\xe1\x2c\x26\x78\x93\x9a\x60\x46\xc9\xb5\x5e\x6a\x28\x92\x87\x3f\x63\x7b\xdb\xf7\xd0\x13\x9d\x32\x40\x5e\xcf\xfb\x79\x68\x47\x4c\xfd\x1\x17\xe6\x97\x93\x78\xbb\xa6\x27\xa3\xe8\x1a\xe8\x94\x55\x7d\x8\xe5\xdc\x66\xa3\x69\xc8\xca\xc5\xa1\x84\x55\xde\x8\x91\x16\x3a\xc\x86\xab\x27\x2b\x64\x34\x2\x6c\x76\x8b\xc6\xaf\xcc\xe1\xd6\x8c\x2a\x18\x3d\xa6\x1b\x37\x75\x45\x73\xc2\x75\xd7\x53\x78\x3a\xd6\xe8\x29\xd2\x4a\xa8\x1e\x82\xf6\xb6\x81\xde\x21\xed\x2b\x56\xbb\xf2\xd0\x57\xc1\x7c\xd2\x6a\xd2\x56\xf5\x13\x5f\x1c\x6a\xb\x74\xfb\xe9\xfe\x9e\xea\x95\xb2\x46\xab\xa\xfc\xfd\xf3\xbb\x4\x2b\x76\x1b\xa4\x74\xb0\xc1\x78\xc3\x69\xe2\xb0\x1\xe1\xde\x32\x4c\x8d\x1a\xb3\x38\x8\xd5\xfc\x1f\xdc\xe\x2c\x9c\xb1\xa1\x63\x17\x22\xf5\x6c\x93\x70\x74\x0\xf8\x39\x1\x94\xd1\x32\x23\x56\x5d\xa6\x2\x76\x76\x93\xce\x2f\x19\xe9\x17\x52\xae\x6e\x2c\x6d\x61\x7f\x3b\xaa\xe0\x52\x85\xc5\x65\xc1\xbb\x8e\x5b\x21\xd5\xc9\x78\x83\x7\x97\x4c\x62\x61\x41\xd4\xfc\xc9\x39\xe3\x9b\xd0\xcc\x75\xc4\x97\xe6\xdd\x2a\x5f\xa6\xe8\x59\x6c\x98\xb9\x2\xe2\xa2\xd6\x68\xee\x3b\x1d\xe3\x4d\x5b\x30\xef\x3\xf2\xeb\x18\x57\x36\xe8\xa1\xf4\x47\xfb\xcb\x8f\xcb\xc8\xf3\x4f\x74\x9d\x9d\xb1\x8d\x14\x44\xd9\x19\xb4\x54\x4f\x75\x19\x9\xa0\x75\xbc\x3b\x82\xc6\x3f\xb8\x83\x19\x6e\xd6\x37\xfe\x6e\x8a\x4e\xe0\x4a\xab\x7b\xc8\xb4\x1d\xf4\xed\x27\x3\x65\xa2\xa1\xae\x11\xe7\x98\x78\x48\x91\xd2\xd2\xd4\x23\x78\x50\xb1\x5b\x85\x10\x8d\xca\x5f\xf\x71\xae\x72\x9a\xf6\x25\x19\x60\x6\xf7\x10\x34\x18\xd\xc9\x9f\x7b\xc\x9b\x8f\x91\x1b\x9f\xcd\x10\xee\x75\xf9\x97\x66\xfc\x4d\x33\x6e\x28\x2b\x92\x85\x4f\xab\x43\x8d\x8f\x7d\x86\xa7\xc7\xd8\xd3\xb\x8b\x57\xb6\x1d\x95\xd\xe9\xbc\xd9\x3\xd9\x10\x19\xc3\x46\x63\x55\x87\x61\x79\x6c\x95\xe\x9c\xdd\xca\xc3\xf3\x64\xf0\x7d\x76\xb7\x53\x67\x2b\x1e\x44\x56\x81\xea\x8f\x5c\x42\x16\xb8\x28\xeb\x1b\x61\x10\x1e\xbf\xec\xa8",
+	.ilen	= 1933,
+	.result	= "\x6a\xfc\x4b\x25\xdf\xc0\xe4\xe8\x17\x4d\x4c\xc9\x7e\xde\x3a\xcc\x3c\xba\x6a\x77\x47\xdb\xe3\x74\x7a\x4d\x5f\x8d\x37\x55\x80\x73\x90\x66\x5d\x3a\x7d\x5d\x86\x5e\x8d\xfd\x83\xff\x4e\x74\x6f\xf9\xe6\x70\x17\x70\x3e\x96\xa7\x7e\xcb\xab\x8f\x58\x24\x9b\x1\xfd\xcb\xe6\x4d\x9b\xf0\x88\x94\x57\x66\xef\x72\x4c\x42\x6e\x16\x19\x15\xea\x70\x5b\xac\x13\xdb\x9f\x18\xe2\x3c\x26\x97\xbc\xdc\x45\x8c\x6c\x24\x69\x9c\xf7\x65\x1e\x18\x59\x31\x7c\xe4\x73\xbc\x39\x62\xc6\x5c\x9f\xbf\xfa\x90\x3\xc9\x72\x26\xb6\x1b\xc2\xb7\x3f\xf2\x13\x77\xf2\x8d\xb9\x47\xd0\x53\xdd\xc8\x91\x83\x8b\xb1\xce\xa3\xfe\xcd\xd9\xdd\x92\x7b\xdb\xb8\xfb\xc9\x2d\x1\x59\x39\x52\xad\x1b\xec\xcf\xd7\x70\x13\x21\xf5\x47\xaa\x18\x21\x5c\xc9\x9a\xd2\x6b\x5\x9c\x1\xa1\xda\x35\x5d\xb3\x70\xe6\xa9\x80\x8b\x91\xb7\xb3\x5f\x24\x9a\xb7\xd1\x6b\xa1\x1c\x50\xba\x49\xe0\xee\x2e\x75\xac\x69\xc0\xeb\x3\xdd\x19\xe5\xf6\x6\xdd\xc3\xd7\x2b\x7\x7\x30\xa7\x19\xc\xbf\xe6\x18\xcc\xb1\x1\x11\x85\x77\x1d\x96\xa7\xa3\x0\x84\x2\xa2\x83\x68\xda\x17\x27\xc8\x7f\x23\xb7\xf4\x13\x85\xcf\xdd\x7a\x7d\x24\x57\xfe\x5\x93\xf5\x74\xce\xed\xc\x20\x98\x8d\x92\x30\xa1\x29\x23\x1a\xa0\x4f\x69\x56\x4c\xe1\xc8\xce\xf6\x9a\xc\xa4\xfa\x4\xf6\x62\x95\xf2\xfa\xc7\x40\x68\x40\x8f\x41\xda\xb4\x26\x6f\x70\xab\x40\x61\xa4\xe\x75\xfb\x86\xeb\x9d\x9a\x1f\xec\x76\x99\xe7\xea\xaa\x1e\x2d\xb5\xd4\xa6\x1a\xb8\x61\xa\x1d\x16\x5b\x98\xc2\x31\x40\xe7\x23\x1d\x66\x99\xc8\xc0\xd7\xce\xf3\x57\x40\x4\x3f\xfc\xea\xb3\xfc\xd2\xd3\x99\xa4\x94\x69\xa0\xef\xd1\x85\xb3\xa6\xb1\x28\xbf\x94\x67\x22\xc3\x36\x46\xf8\xd2\xf\x5f\xf4\x59\x80\xe6\x2d\x43\x8\x7d\x19\x9\x97\xa7\x4c\x3d\x8d\xba\x65\x62\xa3\x71\x33\x29\x62\xdb\xc1\x33\x34\x1a\x63\x33\x16\xb6\x64\x7e\xab\x33\xf0\xe6\x26\x68\xba\x1d\x2e\x38\x8\xe6\x2\xd3\x25\x2c\x47\x23\x58\x34\xf\x9d\x63\x4f\x63\xbb\x7f\x3b\x34\x38\xa7\xb5\x8d\x65\xd9\x9f\x79\x55\x3e\x4d\xe7\x73\xd8\xf6\x98\x97\x84\x60\x9c\xc8\xa9\x3c\xf6\xdc\x12\x5c\xe1\xbb\xb\x8b\x98\x9c\x9d\x26\x7c\x4a\xe6\x46\x36\x58\x21\x4a\xee\xca\xd7\x3b\xc2\x6c\x49\x2f\xe5\xd5\x3\x59\x84\x53\xcb\xfe\x92\x71\x2e\x7c\x21\xcc\x99\x85\x7f\xb8\x74\x90\x13\x42\x3f\xe0\x6b\x1d\xf2\x4d\x54\xd4\xfc\x3a\x5\xe6\x74\xaf\xa6\xa0\x2a\x20\x23\x5d\x34\x5c\xd9\x3e\x4e\xfa\x93\xe7\xaa\xe9\x6f\x8\x43\x67\x41\xc5\xad\xfb\x31\x95\x82\x73\x32\xd8\xa6\xa3\xed\xe\x2d\xf6\x5f\xfd\x80\xa6\x7a\xe0\xdf\x78\x15\x29\x74\x33\xd0\x9e\x83\x86\x72\x22\x57\x29\xb9\x9e\x5d\xd3\x1a\xb5\x96\x72\x41\x3d\xf1\x64\x43\x67\xee\xaa\x5c\xd3\x9a\x96\x13\x11\x5d\xf3\xc\x87\x82\x1e\x41\x9e\xd0\x27\xd7\x54\x3b\x67\x73\x9\x91\xe9\xd5\x36\xa7\xb5\x55\xe4\xf3\x21\x51\x49\x22\x7\x55\x4f\x44\x4b\xd2\x15\x93\x17\x2a\xfa\x4d\x4a\x57\xdb\x4c\xa6\xeb\xec\x53\x25\x6c\x21\xed\x0\x4c\x3b\xca\x14\x57\xa9\xd6\x6a\xcd\x8d\x5e\x74\xac\x72\xc1\x97\xe5\x1b\x45\x4e\xda\xfc\xcc\x40\xe8\x48\x88\xb\xa3\xe3\x8d\x83\x42\xc3\x23\xfd\x68\xb5\x8e\xf1\x9d\x63\x77\xe9\xa3\x8e\x8c\x26\x6b\xbd\x72\x73\x35\xc\x3\xf8\x43\x78\x52\x71\x15\x1f\x71\x5d\x6e\xed\xb9\xcc\x86\x30\xdb\x2b\xd3\x82\x88\x23\x71\x90\x53\x5c\xa9\x2f\x76\x1\xb7\x9a\xfe\x43\x55\xa3\x4\x9b\xe\xe4\x59\xdf\xc9\xe9\xb1\xea\x29\x28\x3c\x5c\xae\x72\x84\xb6\xc6\xeb\xc\x27\x7\x74\x90\xd\x31\xb0\x0\x77\xe9\x40\x70\x6f\x68\xa7\xfd\x6\xec\x4b\xc0\xb7\xac\xbc\x33\xb7\x6d\xa\xbd\x12\x1b\x59\xcb\xdd\x32\xf5\x1d\x94\x57\x76\x9e\xc\x18\x98\x71\xd7\x2a\xdb\xb\x7b\xa7\x71\xb7\x67\x81\x23\x96\xae\xb9\x7e\x32\x43\x92\x8a\x19\xa0\xc4\xd4\x3b\x57\xf9\x4a\x2c\xfb\x51\x46\xbb\xcb\x5d\xb3\xef\x13\x93\x6e\x68\x42\x54\x57\xd3\x6a\x3a\x8f\x9d\x66\xbf\xbd\x36\x23\xf5\x93\x83\x7b\x9c\xc0\xdd\xc5\x49\xc0\x64\xed\x7\x12\xb3\xe6\xe4\xe5\x38\x95\x23\xb1\xa0\x3b\x1a\x61\xda\x17\xac\xc3\x58\xdd\x74\x64\x22\x11\xe8\x32\x1d\x16\x93\x85\x99\xa5\x9c\x34\x55\xb1\xe9\x20\x72\xc9\x28\x7b\x79\x0\xa1\xa6\xa3\x27\x40\x18\x8a\x54\xe0\xcc\xe8\x4e\x8e\x43\x96\xe7\x3f\xc8\xe9\xb2\xf9\xc9\xda\x4\x71\x50\x47\xe4\xaa\xce\xa2\x30\xc8\xe4\xac\xc7\xd\x6\x2e\xe6\xe8\x80\x36\x29\x9e\x1\xb8\xc3\xf0\xa0\x5d\x7a\xca\x4d\xa0\x57\xbd\x2a\x45\xa7\x7f\x9c\x93\x7\x8f\x35\x67\x92\xe3\xe9\x7f\xa8\x61\x43\x9e\x25\x4f\x33\x76\x13\x6e\x12\xb9\xdd\xa4\x7c\x8\x9f\x7c\xe7\xa\x8d\x84\x6\xa4\x33\x17\x34\x5e\x10\x7c\xc0\xa8\x3d\x1f\x42\x20\x51\x65\x5d\x9\xc3\xaa\xc0\xc8\xd\xf0\x79\xbc\x20\x1b\x95\xe7\x6\x7d\x47\x20\x3\x1a\x74\xdd\xe2\xd4\xae\x38\x71\x9b\xf5\x80\xec\x8\x4e\x56\xba\x76\x12\x1a\xdf\x48\xf3\xae\xb3\xe6\xe6\xbe\xc0\x91\x2e\x1\xb3\x1\x86\xa2\xb9\x52\xd1\x21\xae\xd4\x97\x1d\xef\x41\x12\x95\x3d\x48\x45\x1c\x56\x32\x8f\xb8\x43\xbb\x19\xf3\xca\xe9\xeb\x6d\x84\xbe\x86\x6\xe2\x36\xb2\x62\x9d\xd3\x4c\x48\x18\x54\x13\x4e\xcf\xfd\xba\x84\xb9\x30\x53\xcf\xfb\xb9\x29\x8f\xdc\x9f\xef\x60\xb\x64\xf6\x8b\xee\xa6\x91\xc2\x41\x6c\xf6\xfa\x79\x67\x4b\xc1\x3f\xaf\x9\x81\xd4\x5d\xcb\x9\xdf\x36\x31\xc0\x14\x3c\x7c\xe\x65\x95\x99\x6d\xa3\xf4\xd7\x38\xee\x1a\x2b\x37\xe2\xa4\x3b\x4b\xd0\x65\xca\xf8\xc3\xe8\x15\x20\xef\xf2\x0\xfd\x1\x9\xc5\xc8\x17\x4\x93\xd0\x93\x3\x55\xc5\xfe\x32\xa3\x3e\x28\x2d\x3b\x93\x8a\xcc\x7\x72\x80\x8b\x74\x16\x24\xbb\xda\x94\x39\x30\x8f\xb1\xcd\x4a\x90\x92\x7c\x14\x8f\x95\x4e\xac\x9b\xd8\x8f\x1a\x87\xa4\x32\x27\x8a\xba\xf7\x41\xcf\x84\x37\x19\xe6\x6\xf5\xe\xcf\x36\xf5\x9e\x6c\xde\xbc\xff\x64\x7e\x4e\x59\x57\x48\xfe\x14\xf7\x9c\x93\x5d\x15\xad\xcc\x11\xb1\x17\x18\xb2\x7e\xcc\xab\xe9\xce\x7d\x77\x5b\x51\x1b\x1e\x20\xa8\x32\x6\xe\x75\x93\xac\xdb\x35\x37\x1f\xe9\x19\x1d\xb4\x71\x97\xd6\x4e\x2c\x8\xa5\x13\xf9\xe\x7e\x78\x6e\x14\xe0\xa9\xb9\x96\x4c\x80\x82\xba\x17\xb3\x9d\x69\xb0\x84\x46\xff\xf9\x52\x79\x94\x58\x3a\x62\x90\x15\x35\x71\x10\x37\xed\xa1\x8e\x53\x6e\xf4\x26\x57\x93\x15\x93\xf6\x81\x2c\x5a\x10\xda\x92\xad\x2f\xdb\x28\x31\x2d\x55\x4\xd2\x6\x28\x8c\x1e\xdc\xea\x54\xac\xff\xb7\x6c\x30\x15\xd4\xb4\xd\x0\x93\x57\xdd\xd2\x7\x7\x6\xd9\x43\x9b\xcd\x3a\xf4\x7d\x4c\x36\x5d\x23\xa2\xcc\x57\x40\x91\xe9\x2c\x2f\x2c\xd5\x30\x9b\x17\xb0\xc9\xf7\xa7\x2f\xd1\x93\x20\x6b\xc6\xc1\xe4\x6f\xcb\xd1\xe7\x9\xf\x9e\xdc\xaa\x9f\x2f\xdf\x56\x9f\xd4\x33\x4\xaf\xd3\x6c\x58\x61\xf0\x30\xec\xf2\x7f\xf2\x9c\xdf\x39\xbb\x6f\xa2\x8c\x7e\xc4\x22\x51\x71\xc0\x4d\x14\x1a\xc4\xcd\x4\xd9\x87\x8\x50\x5\xcc\xaf\xf6\xf0\x8f\x92\x54\x58\xc2\xc7\x9\x7a\x59\x2\x5\xe8\xb0\x86\xd9\xbf\x7b\x35\x51\x4d\xaf\x8\x97\x2c\x65\xda\x2a\x71\x3a\xa8\x51\xcc\xf2\x73\x27\xc3\xfd\x62\xcf\xe3\xb2\xca\xcb\xbe\x1a\xa\xa1\x34\x7b\x77\xc4\x62\x68\x78\x5f\x94\x7\x4\x65\x16\x4b\x61\xcb\xff\x75\x26\x50\x66\x1f\x6e\x93\xf8\xc5\x51\xeb\xa4\x4a\x48\x68\x6b\xe2\x5e\x44\xb2\x50\x2c\x6c\xae\x79\x4e\x66\x35\x81\x50\xac\xbc\x3f\xb1\xc\xf3\x5\x3c\x4a\xa3\x6c\x2a\x79\xb4\xb7\xab\xca\xc7\x9b\x8e\xcd\x5f\x11\x3\xcb\x30\xa3\xab\xda\xfe\x64\xb9\xbb\xd8\x5e\x3a\x1a\x56\xe5\x5\x48\x90\x1e\x61\x69\x1b\x22\xe6\x1a\x3c\x75\xad\x1f\x37\x28\xdc\xe4\x6d\xbd\x42\xdc\xd3\xc8\xb6\x1c\x48\xfe\x94\x77\x7f\xbd\x62\xac\xa3\x47\x27\xcf\x5f\xd9\xdb\xaf\xec\xf7\x5e\xc1\xb0\x9d\x1\x26\x99\x7e\x8f\x3\x70\xb5\x42\xbe\x67\x28\x1b\x7c\xbd\x61\x21\x97\xcc\x5c\xe1\x97\x8f\x8d\xde\x2b\xaa\xa7\x71\x1d\x1e\x2\x73\x70\x58\x32\x5b\x1d\x67\x3d\xe0\x74\x4f\x3\xf2\x70\x51\x79\xf1\x61\x70\x15\x74\x9d\x23\x89\xde\xac\xfd\xde\xd0\x1f\xc3\x87\x44\x35\x4b\xe5\xb0\x60\xc5\x22\xe4\x9e\xca\xeb\xd5\x3a\x9\x45\xa4\xdb\xfa\x3f\xeb\x1b\xc7\xc8\x14\x99\x51\x92\x10\xed\xed\x28\xe0\xa1\xf8\x26\xcf\xcd\xcb\x63\xa1\x3b\xe3\xdf\x7e\xfe\xa6\xf0\x81\x9a\xbf\x55\xde\x54\xd5\x56\x60\x98\x10\x68\xf4\x38\x96\x8e\x6f\x1d\x44\x7f\xd6\x2f\xfe\x55\xfb\xc\x7e\x67\xe2\x61\x44\xed\xf2\x35\x30\x5d\xe9\xc7\xd6\x6d\xe0\xa0\xed\xf3\xfc\xd8\x3e\xa\x7b\xcd\xaf\x65\x68\x18\xc0\xec\x4\x1c\x74\x6d\xe2\x6e\x79\xd4\x11\x2b\x62\xd5\x27\xad\x4f\x1\x59\x73\xcc\x6a\x53\xfb\x2d\xd5\x4e\x99\x21\x65\x4d\xf5\x82\xf7\xd8\x42\xce\x6f\x3d\x36\x47\xf1\x5\x16\xe8\x1b\x6a\x8f\x93\xf2\x8f\x37\x40\x12\x28\xa3\xe6\xb9\x17\x4a\x1f\xb1\xd1\x66\x69\x86\xc4\xfc\x97\xae\x3f\x8f\x1e\x2b\xdf\xcd\xf9\x3c"
+}, {
+	.key	= "\xb3\x35\x50\x3\x54\x2e\x40\x5e\x8f\x59\x8e\xc5\x90\xd5\x27\x2d\xba\x29\x2e\xcb\x1b\x70\x44\x1e\x65\x91\x6e\x2a\x79\x22\xda\x64",
+	.nonce	= "\x5\xa3\x93\xed\x30\xc5\xa2\x6",
+	.assoc	= "\xb1\x69\x83\x87\x30\xaa\x5d\xb8\x77\xe8\x21\xff\x6\x59\x35\xce\x75\xfe\x38\xef\xb8\x91\x43\x8c\xcf\x70\xdd\xa\x68\xbf\xd4\xbc\x16\x76\x99\x36\x1e\x58\x79\x5e\xd4\x29\xf7\x33\x93\x48\xdb\x5f\x1\xae\x9c\xb6\xe4\x88\x6d\x2b\x76\x75\xe0\xf3\x74\xe2\xc9",
+	.alen	= 63,
+	.input	= "\x74\xa6\x3e\xe4\xb1\xcb\xaf\xb0\x40\xe5\xf\x9e\xf1\xf2\x89\xb5\x42\x34\x8a\xa1\x3\xb7\xe9\x57\x46\xbe\x20\xe4\x6e\xb0\xeb\xff\xea\x7\x7e\xef\xe2\x55\x9f\xe5\x78\x3a\xb7\x83\xc2\x18\x40\x7b\xeb\xcd\x81\xfb\x90\x12\x9e\x46\xa9\xd6\x4a\xba\xb0\x62\xdb\x6b\x99\xc4\xdb\x54\x4b\xb8\xa5\x71\xcb\xcd\x63\x32\x55\xfb\x31\xf0\x38\xf5\xbe\x78\xe4\x45\xce\x1b\x6a\x5b\xe\xf4\x16\xe4\xb1\x3d\xf6\x63\x7b\xa7\xc\xde\x6f\x8f\x74\xdf\xe0\x1e\x9d\xce\x8f\x24\xef\x23\x35\x33\x7b\x83\x34\x23\x58\x74\x14\x77\x1f\xc2\x4f\x4e\xc6\x89\xf9\x52\x9\x37\x64\x14\xc4\x1\x6b\x9d\x77\xe8\x90\x5d\xa8\x4a\x2a\xef\x5c\x7f\xeb\xbb\xb2\xc6\x93\x99\x66\xdc\x7f\xd4\x9e\x2a\xca\x8d\xdb\xe7\x20\xcf\xe4\x73\xae\x49\x7d\x64\xf\xe\x28\x46\xa9\xa8\x32\xe4\xe\xf6\x51\x53\xb8\x3c\xb1\xff\xa3\x33\x41\x75\xff\xf1\x6f\xf1\xfb\xbb\x83\x7f\x6\x9b\xe7\x1b\xa\xe0\x5c\x33\x60\x5b\xdb\x5b\xed\xfe\xa5\x16\x19\x72\xa3\x64\x23\x0\x2\xc7\xf3\x6a\x81\x3e\x44\x1d\x79\x15\x5f\x9a\xde\xe2\xfd\x1b\x73\xc1\xbc\x23\xba\x31\xd2\x50\xd5\xad\x7f\x74\xa7\xc9\xf8\x3e\x2b\x26\x10\xf6\x3\x36\x74\xe4\xe\x6a\x72\xb7\x73\xa\x42\x28\xc2\xad\x5e\x3\xbe\xb8\xb\xa8\x5b\xd4\xb8\xba\x52\x89\xb1\x9b\xc1\xc3\x65\x87\xed\xa5\xf4\x86\xfd\x41\x80\x91\x27\x59\x53\x67\x15\x78\x54\x8b\x2d\x3d\xc7\xff\x2\x92\x7\x5f\x7a\x4b\x60\x59\x3c\x6f\x5c\xd8\xec\x95\xd2\xfe\xa0\x3b\xd8\x3f\xd1\x69\xa6\xd6\x41\xb2\xf4\x4d\x12\xf4\x58\x3e\x66\x64\x80\x31\x9b\xa8\x4c\x8b\x7\xb2\xec\x66\x94\x66\x47\x50\x50\x5f\x18\xb\xe\xd6\xc0\x39\x21\x13\x9e\x33\xbc\x79\x36\x2\x96\x70\xf0\x48\x67\x2f\x26\xe9\x6d\x10\xbb\xd6\x3f\xd1\x64\x7a\x2e\xbe\xc\x61\xf0\x75\x42\x38\x23\xb1\x9e\x9f\x7c\x67\x66\xd9\x58\x9a\xf1\xbb\x41\x2a\x8d\x65\x84\x94\xfc\xdc\x6a\x50\x64\xdb\x56\x33\x76\x0\x10\xed\xbe\xd2\x12\xf6\xf6\x1b\xa2\x16\xde\xae\x31\x95\xdd\xb1\x8\x7e\x4e\xee\xe7\xf9\xa5\xfb\x5b\x61\x43\x0\x40\xf6\x7e\x2\x4\x32\x4e\xc\xe2\x66\xd\xd7\x7\x98\xe\xf8\x72\x34\x6d\x95\x86\xd7\xcb\x31\x54\x47\xd0\x38\x29\x9c\x5a\x68\xd4\x87\x76\xc9\xe7\x7e\xe3\xf4\x81\x6d\x18\xcb\xc9\x5\xaf\xa0\xfb\x66\xf7\xf1\x1c\xc6\x14\x11\x4f\x2b\x79\x42\x8b\xbc\xac\xe7\x6c\xfe\xf\x58\xe7\x7c\x78\x39\x30\xb0\x66\x2c\x9b\x6d\x3a\xe1\xcf\xc9\xa4\xe\x6d\x6d\x8a\xa1\x3a\xe7\x28\xd4\x78\x4c\xa6\xa2\x2a\xa6\x3\x30\xd7\xa8\x25\x66\x87\x2f\x69\x5c\x4e\xdd\xa5\x49\x5d\x37\x4a\x59\xc4\xaf\x1f\xa2\xe4\xf8\xa6\x12\x97\xd5\x79\xf5\xe2\x4a\x2b\x5f\x61\xe4\x9e\xe3\xee\xb8\xa7\x5b\x2f\xf4\x9e\x6c\xfb\xd1\xc6\x56\x77\xba\x75\xaa\x3d\x1a\xa8\xb\xb3\x68\x24\x0\x10\x7f\xfd\xd7\xa1\x8d\x83\x54\x4f\x1f\xd8\x2a\xbe\x8a\xc\x87\xab\xa2\xde\xc3\x39\xbf\x9\x3\xa5\xf3\x5\x28\xe1\xe1\xee\x39\x70\x9c\xd8\x81\x12\x1e\x2\x40\xd2\x6e\xf0\xeb\x1b\x3d\x22\xc6\xe5\xe3\xb4\x5a\x98\xbb\xf0\x22\x28\x8d\xe5\xd3\x16\x48\x24\xa5\xe6\x66\xc\xf9\x8\xf9\x7e\x1e\xe1\x28\x26\x22\xc7\xc7\xa\x32\x47\xfa\xa3\xbe\x3c\xc4\xc5\x53\xa\xd5\x94\x4a\xd7\x93\xd8\x42\x99\xb9\xa\xdb\x56\xf7\xb9\x1c\x53\x4f\xfa\xd3\x74\xad\xd9\x68\xf1\x1b\xdf\x61\xc6\x5e\xa8\x48\xfc\xd4\x4a\x4c\x3c\x32\xf7\x1c\x96\x21\x9b\xf9\xa3\xcc\x5a\xce\xd5\xd7\x8\x24\xf6\x1c\xfd\xdd\x38\xc2\x32\xe9\xb8\xe7\xb6\xfa\x9d\x45\x13\x2c\x83\xfd\x4a\x69\x82\xcd\xdc\xb3\x76\xc\x9e\xd8\xf4\x1b\x45\x15\xb4\x97\xe7\x58\x34\xe2\x3\x29\x5a\xbf\xb6\xe0\x5d\x13\xd9\x2b\xb4\x80\xb2\x45\x81\x6a\x2e\x6c\x89\x7d\xee\xbb\x52\xdd\x1f\x18\xe7\x13\x6b\x33\xe\xea\x36\x92\x77\x7b\x6d\x9c\x5a\x5f\x45\x7b\x7b\x35\x62\x23\xd1\xbf\xf\xd0\x8\x1b\x2b\x80\x6b\x7e\xf1\x21\x47\xb0\x57\xd1\x98\x72\x90\x34\x1c\x20\x4\xff\x3d\x5c\xee\xe\x57\x5f\x6f\x24\x4e\x3c\xea\xfc\xa5\xa9\x83\xc9\x61\xb4\x51\x24\xf8\x27\x5e\x46\x8c\xb1\x53\x2\x96\x35\xba\xb8\x4c\x71\xd3\x15\x59\x35\x22\x20\xad\x3\x9f\x66\x44\x3b\x9c\x35\x37\x1f\x9b\xbb\xf3\xdb\x35\x63\x30\x64\xaa\xa2\x6\xa8\x5d\xbb\xe1\x9f\x70\xec\x82\x11\x6\x36\xec\x8b\x69\x66\x24\x44\xc9\x4a\x57\xbb\x9b\x78\x13\xce\x9c\xc\xba\x92\x93\x63\xb8\xe2\x95\xf\xf\x16\x39\x52\xfd\x3a\x6d\x2\x4b\xdf\x13\xd3\x2a\x22\xb4\x3\x7c\x54\x49\x96\x68\x54\x10\xfa\xef\xaa\x6c\xe8\x22\xdc\x71\x16\x13\x1a\xf6\x28\xe5\x6d\x77\x3d\xcd\x30\x63\xb1\x70\x52\xa1\xc5\x94\x5f\xcf\xe8\xb8\x26\x98\xf7\x6\xa0\xa\x70\xfa\x3\x80\xac\xc1\xec\xd6\x4c\x54\xd7\xfe\x47\xb6\x88\x4a\xf7\x71\x24\xee\xf3\xd2\xc2\x4a\x7f\xfe\x61\xc7\x35\xc9\x37\x67\xcb\x24\x35\xda\x7e\xca\x5f\xf3\x8d\xd4\x13\x8e\xd6\xcb\x4d\x53\x8f\x53\x1f\xc0\x74\xf7\x53\xb9\x5e\x23\x37\xba\x6e\xe3\x9d\x7\x55\x25\x7b\xe6\x2a\x64\xd1\x32\xdd\x54\x1b\x4b\xc0\xe1\xd7\x69\x58\xf8\x93\x29\xc4\xdd\x23\x2f\xa5\xfc\x9d\x7e\xf8\xd4\x90\xcd\x82\x55\xdc\x16\x16\x9f\x7\x52\x9b\x9d\x25\xed\x32\xc5\x7b\xdf\xf6\x83\x46\x3d\x65\xb7\xef\x87\x7a\x12\x69\x8f\x6\x7c\x51\x15\x4a\x8\xe8\xac\x9a\xc\x24\xa7\x27\xd8\x46\x2f\xe7\x1\xe\x1c\xc6\x91\xb0\x6e\x85\x65\xf0\x29\xd\x2e\x6b\x3b\xfb\x4b\xdf\xe4\x80\x93\x3\x66\x46\x3e\x8a\x6e\xf3\x5e\x4d\x62\xe\x49\x5\xaf\xd4\xf8\x21\x20\x61\x1d\x39\x17\xf4\x61\x47\x95\xfb\x15\x2e\xb3\x4f\xd0\x5d\xf5\x7d\x40\xda\x90\x3c\x6b\xcb\x17\x0\x13\x3b\x64\x34\x1b\xf0\xf2\xe5\x3b\xb2\xc7\xd3\x5f\x3a\x44\xa6\x9b\xb7\x78\xe\x42\x5d\x4c\xc1\xe9\xd2\xcb\xb7\x78\xd1\xfe\x9a\xb5\x7\xe9\xe0\xbe\xe2\x8a\xa7\x1\x83\x0\x8c\x5c\x8\xe6\x63\x12\x92\xb7\xb7\xa6\x19\x7d\x38\x13\x38\x92\x87\x24\xf9\x48\xb3\x5e\x87\x6a\x40\x39\x5c\x3f\xed\x8f\xee\xdb\x15\x82\x6\xda\x49\x21\x2b\xb5\xbf\x32\x7c\x9f\x42\x28\x63\xcf\xaf\x1e\xf8\xc6\xa0\xd1\x2\x43\x57\x62\xec\x9b\xf\x1\x9e\x71\xd8\x87\x9d\x1\xc1\x58\x77\xd9\xaf\xb1\x10\x7e\xdd\xa6\x50\x96\xe5\xf0\x72\x0\x6d\x4b\xf8\x2a\x8f\x19\xf3\x22\x88\x11\x4a\x8b\x7c\xfd\xb7\xed\xe1\xf6\x40\x39\xe0\xe9\xf6\x3d\x25\xe6\x74\x3c\x58\x57\x7f\xe1\x22\x96\x47\x31\x91\xba\x70\x85\x28\x6b\x9f\x6e\x25\xac\x23\x66\x2f\x29\x88\x28\xce\x8c\x5c\x88\x53\xd1\x3b\xcc\x6a\x51\xb2\xe1\x28\x3f\x91\xb4\xd\x0\x3a\xe3\xf8\xc3\x8f\xd7\x96\x62\xe\x2e\xfc\xc8\x6c\x77\xa6\x1d\x22\xc1\xb8\xe6\x61\xd7\x67\x36\x13\x7b\xbb\x9b\x59\x9\xa6\xdf\xf7\x6b\xa3\x40\x1a\xf5\x4f\xb4\xda\xd3\xf3\x81\x93\xc6\x18\xd9\x26\xee\xac\xf0\xaa\xdf\xc5\x9c\xca\xc2\xa2\xcc\x7b\x5c\x24\xb0\xbc\xd0\x6a\x4d\x89\x9\xb8\x7\xfe\x87\xad\xa\xea\xb8\x42\xf9\x5e\xb3\x3e\x36\x4c\xaf\x75\x9e\x1c\xeb\xbd\xbc\xbb\x80\x40\xa7\x3a\x30\xbf\xa8\x44\xf4\xeb\x38\xad\x29\xba\x23\xed\x41\xc\xea\xd2\xbb\x41\x18\xd6\xb9\xba\x65\x2b\xa3\x91\x6d\x1f\xa9\xf4\xd1\x25\x8d\x4d\x38\xff\x64\xa0\xec\xde\xa6\xb6\x79\xab\x8e\x33\x6c\x47\xde\xaf\x94\xa4\xa5\x86\x77\x55\x9\x92\x81\x31\x76\xc7\x34\x22\x89\x8e\x3d\x26\x26\xd7\xfc\x1e\x16\x72\x13\x33\x63\xd5\x22\xbe\xb8\x4\x34\x84\x41\xbb\x80\xd0\x9f\x46\x48\x7\xa7\xfc\x2b\x3a\x75\x55\x8c\xc7\x6a\xbd\x7e\x46\x8\x84\xf\xd5\x74\xc0\x82\x8e\xaa\x61\x5\x1\xb2\x47\x6e\x20\x6a\x2d\x58\x70\x48\x32\xa7\x37\xd2\xb8\x82\x1a\x51\xb9\x61\xdd\xfd\x9d\x6b\xe\x18\x97\xf8\x45\x5f\x87\x10\xcf\x34\x72\x45\x26\x49\x70\xe7\xa3\x78\xe0\x52\x89\x84\x94\x83\x82\xc2\x69\x8f\xe3\xe1\x3f\x60\x74\x88\xc4\xf7\x75\x2c\xfb\xbd\xb6\xc4\x7e\x10\xa\x6c\x90\x4\x9e\xc3\x3f\x59\x7c\xce\x31\x18\x60\x57\x73\x46\x94\x7d\x6\xa0\x6d\x44\xec\xa2\xa\x9e\x5\x15\xef\xca\x5c\xbf\x0\xeb\xf7\x3d\x32\xd4\xa5\xef\x49\x89\x5e\x46\xb0\xa6\x63\x5b\x8a\x73\xae\x6f\xd5\x9d\xf8\x4f\x40\xb5\xb2\x6e\xd3\xb6\x1\xa9\x26\xa2\x21\xcf\x33\x7a\x3a\xa4\x23\x13\xb0\x69\x6a\xee\xce\xd8\x9d\x1\x1d\x50\xc1\x30\x6c\xb1\xcd\xa0\xf0\xf0\xa2\x64\x6f\xbb\xbf\x5e\xe6\xab\x87\xb4\xf\x4f\x15\xaf\xb5\x25\xa1\xb2\xd0\x80\x2c\xfb\xf9\xfe\xd2\x33\xbb\x76\xfe\x7c\xa8\x66\xf7\xe7\x85\x9f\x1f\x85\x57\x88\xe1\xe9\x63\xe4\xd8\x1c\xa1\xfb\xda\x44\x5\x2e\x1d\x3a\x1c\xff\xc8\x3b\xc0\xfe\xda\x22\xb\x43\xd6\x88\x39\x4c\x4a\xa6\x69\x18\x93\x42\x4e\xb5\xcc\x66\xd\x9\xf8\x1e\x7c\xd3\x3c\x99\xd\x50\x1d\x62\xe9\x57\x6\xbf\x19\x88\xdd\xad\x7b\x4f\xf9\xc7\x82\x6d\x8d\xc8\xc4\xc5\x78\x17\x20\x15\xc5\x52\x41\xcf\x5b\xd6\x7f\x94\x2\x41\xe0\x40\x22\x3\x5e\xd1\x53\xd4\x86\xd3\x2c\x9f\xf\x96\xe3\x6b\x9a\x76\x32\x6\x47\x4b\x11\xb3\xdd\x3\x65\xbd\x9b\x1\xda\x9c\xb9\x7e\x3f\x6a\xc4\x7b\xea\xd4\x3c\xb9\xfb\x5c\x6b\x64\x33\x52\xba\x64\x78\x8f\xa4\xaf\x7a\x61\x8d\xbc\xc5\x73\xe9\x6b\x58\x97\x4b\xbf\x63\x22\xd3\x37\x2\x54\xc5\xb9\x16\x4a\xf0\x19\xd8\x94\x57\xb8\x8a\xb3\x16\x3b\xd0\x84\x8e\x67\xa6\xa3\x7d\x78\xec\x0",
+	.ilen	= 2011,
+	.result	= "\x52\x34\xb3\x65\x3b\xb7\xe5\xd3\xab\x49\x17\x60\xd2\x52\x56\xdf\xdf\x34\x56\x82\xe2\xbe\xe5\xe1\x28\xd1\x4e\x5f\x4f\x1\x7d\x3f\x99\x6b\x30\x6e\x1a\x7c\x4c\x8e\x62\x81\xae\x86\x3f\x6b\xd0\xb5\xa9\xcf\x50\xf1\x2\x12\xa0\xb\x24\xe9\xe6\x72\x89\x2c\x52\x1b\x34\x38\xf8\x75\x5f\xa0\x74\xe2\x99\xdd\xa6\x4b\x14\x50\x4e\xf1\xbe\xd6\x9e\xdb\xb2\x24\x27\x74\x12\x4a\x78\x78\x17\xa5\x58\x8e\x2f\xf9\xf4\x8d\xee\x3\x88\xae\xb8\x29\xa1\x2f\x4b\xee\x92\xbd\x87\xb3\xce\x34\x21\x57\x46\x4\x49\xc\x80\xf2\x1\x13\xa1\x55\xb3\xff\x44\x30\x3c\x1c\xd0\xef\xbc\x18\x74\x26\xad\x41\x5b\x5b\x3e\x9a\x7a\x46\x4f\x16\xd6\x74\x5a\xb7\x3a\x28\x31\xd8\xae\x26\xac\x50\x53\x86\xf2\x56\xd7\x3f\x29\xbc\x45\x68\x8e\xcb\x98\x64\xdd\xc9\xba\xb8\x4b\x7b\x82\xdd\x14\xa7\xcb\x71\x72\x0\x5c\xad\x7b\x6a\x89\xa4\x3d\xbf\xb5\x4b\x3e\x7c\x5a\xcf\xb8\xa1\xc5\x6e\xc8\xb6\x31\x57\x7b\xdf\xa5\x7e\xb1\xd6\x42\x2a\x31\x36\xd1\xd0\x3f\x7a\xe5\x94\xd6\x36\xa0\x6f\xb7\x40\x7d\x37\xc6\x55\x7c\x50\x40\x6d\x29\x89\xe3\x5a\xae\x97\xe7\x44\x49\x6e\xbd\x81\x3d\x3\x93\x6\x12\x6\xe2\x41\x12\x4a\xf1\x6a\xa4\x58\xa2\xfb\xd2\x15\xba\xc9\x79\xc9\xce\x5e\x13\xbb\xf1\x9\x4\xcc\xfd\xe8\x51\x34\x6a\xe8\x61\x88\xda\xed\x1\x47\x84\xf5\x73\x25\xf9\x1c\x42\x86\x7\xf3\x5b\x1a\x1\xb3\xeb\x24\x32\x8d\xf6\xed\x7c\x4b\xeb\x3c\x36\x42\x28\xdf\xdf\xb6\xbe\xd9\x8c\x52\xd3\x2b\x8\x90\x8c\xe7\x98\x31\xe2\x32\x8e\xfc\x11\x48\x0\xa8\x6a\x42\x4a\x2\xc6\x4b\x9\xf1\xe3\x49\xf3\x45\x1f\xe\xbc\x56\xe2\xe4\xdf\xfb\xeb\x61\xfa\x24\xc1\x63\x75\xbb\x47\x75\xaf\xe1\x53\x16\x96\x21\x85\x26\x11\xb3\x76\xe3\x23\xa1\x6b\x74\x37\xd0\xde\x6\x90\x71\x5d\x43\x88\x9b\x0\x54\xa6\x75\x2f\xa1\xc2\xb\x73\x20\x1d\xb6\x21\x79\x57\x3f\xfa\x9\xbe\x8a\x33\xc3\x52\xf0\x1d\x82\x31\xd1\x55\xb5\x6c\x99\x25\xcf\x5c\x32\xce\xe9\xd\xfa\x69\x2c\xd5\xd\xc5\x6d\x86\xd0\xc\x3b\x6\x50\x79\xe8\xc3\xae\x4\xe6\xcd\x51\xe4\x26\x9b\x4f\x7e\xa6\xf\xab\xd8\xe5\xde\xa9\x0\x95\xbe\xa3\x9d\x5d\xb2\x9\x70\x18\x1c\xf0\xac\x29\x23\x2\x29\x28\xd2\x74\x35\x57\x62\xf\x24\xea\x5e\x33\xc2\x92\xf3\x78\x4d\x30\x1e\xa1\x99\xa9\x82\xb0\x42\x31\x8d\xad\x8a\xbc\xfc\xd4\x57\x47\x3e\xb4\x50\xdd\x6e\x2c\x80\x4d\x22\xf1\xfb\x57\xc4\xdd\x17\xe1\x8a\x36\x4a\xb3\x37\xca\xc9\x4e\xab\xd5\x69\xc4\xf4\xbc\xb\x3b\x44\x4b\x29\x9c\xee\xd4\x35\x22\x21\xb0\x1f\x27\x64\xa8\x51\x1b\xf0\x9f\x19\x5c\xfb\x5a\x64\x74\x70\x45\x9\xf5\x64\xfe\x1a\x2d\xc9\x14\x4\x14\xcf\xd5\x7d\x60\xaf\x94\x39\x94\xe2\x7d\x79\x82\xd0\x65\x3b\x6b\x9c\x19\x84\xb4\x6d\xb3\xc\x99\xc0\x56\xa8\xbd\x73\xce\x5\x84\x3e\x30\xaa\xc4\x9b\x1b\x4\x2a\x9f\xd7\x43\x2b\x23\xdf\xbf\xaa\xd5\xc2\x43\x2d\x70\xab\xdc\x75\xad\xac\xf7\xc0\xbe\x67\xb2\x74\xed\x67\x10\x4a\x92\x60\xc1\x40\x50\x19\x8a\x8a\x8c\x9\xe\x72\xe1\x73\x5e\xe8\x41\x85\x63\x9f\x3f\xd7\x7d\xc4\xfb\x22\x5d\x92\x6c\xb3\x1e\xe2\x50\x2f\x82\xa8\x28\xc0\xb5\xd7\x5f\x68\xd\x2c\x2d\xaf\x7e\xfa\x2e\x8\xf\x1f\x70\x9f\xe9\x19\x72\x55\xf8\xfb\x51\xd2\x33\x5d\xa0\xd3\x2b\xa\x6c\xbc\x4e\xcf\x36\x4d\xdc\x3b\xe9\x3e\x81\x7c\x61\xdb\x20\x2d\x3a\xc3\xb3\xc\x1e\x0\xb9\x7c\xf5\xca\x10\x5f\x3a\x71\xb3\xe4\x20\xdb\xc\x2a\x98\x63\x45\x0\x58\xf6\x68\xe4\xb\xda\x13\x3b\x60\x5c\x76\xdb\xb9\x97\x71\xe4\xd9\xb7\xdb\xbd\x68\xc7\x84\x84\xaa\x7c\x68\x62\x5e\x16\xfc\xba\x72\xaa\x9a\xa9\xeb\x7c\x75\x47\x97\x7e\xad\xe2\xd9\x91\xe8\xe4\xa5\x31\xd7\x1\x8e\xa2\x11\x88\x95\xb9\xf2\x9b\xd3\x7f\x1b\x81\x22\xf7\x98\x60\xa\x64\xa6\xc1\xf6\x49\xc7\xe3\x7\x4d\x94\x7a\xcf\x6e\x68\xc\x1b\x3f\x6e\x2e\xee\x92\xfa\x52\xb3\x59\xf8\xf1\x8f\x6a\x66\xa3\x82\x76\x4a\x7\x1a\xc7\xdd\xf5\xda\x9c\x3c\x24\xbf\xfd\x42\xa1\x10\x64\x6a\xf\x89\xee\x36\xa5\xce\x99\x48\x6a\xf0\x9f\x9e\x69\xa4\x40\x20\xe9\x16\x15\xf7\xdb\x75\x2\xcb\xe9\x73\x8b\x3b\x49\x2f\xf0\xaf\x51\x6\x5c\xdf\x27\x27\x49\x6a\xd1\xcc\xc7\xb5\x63\xb5\xfc\xb8\x5c\x87\x7f\x84\xb4\xcc\x14\xa9\x53\xda\xa4\x56\xf8\xb6\x1b\xcc\x40\x27\x52\x6\x5a\x13\x81\xd7\x3a\xd4\x3b\xfb\x49\x65\x31\x33\xb2\xfa\xcd\xad\x58\x4e\x2b\xae\xd2\x20\xfb\x1a\x48\xb4\x3f\x9a\xd8\x7a\x35\x4a\xc8\xee\x88\x5e\x7\x66\x54\xb9\xec\x9f\xa3\xe3\xb9\x37\xaa\x49\x76\x31\xda\x74\x2d\x3c\xa4\x65\x10\x32\x38\xf0\xde\xd3\x99\x17\xaa\x71\xaa\x8f\xf\x8c\xaf\xa2\xf8\x5d\x64\xba\x1d\xa3\xef\x96\x73\xe8\xa1\x2\x8d\xc\x6d\xb8\x6\x90\xb8\x8\x56\x2c\xa7\x6\xc9\xc2\x38\xdb\x7c\x63\xb1\x57\x8e\xea\x7c\x79\xf3\x49\x1d\xfe\x9f\xf3\x6e\xb1\x1d\xba\x19\x80\x1a\xa\xd3\xb0\x26\x21\x40\xb1\x7c\xf9\x4d\x8d\x10\xc1\x7e\xf4\xf6\x3c\xa8\xfd\x7c\xa3\x92\xb2\xf\xaa\xcc\xa6\x11\xfe\x4\xe3\xd1\x7a\x32\x89\xdf\xd\xc4\x8f\x79\x6b\xca\x16\x7c\x6e\xf9\xad\xf\xf6\xfe\x27\xdb\xc4\x13\x70\xf1\x62\x1a\x4f\x79\x40\xc9\x9b\x8b\x21\xea\x84\xfa\xf5\xf1\x89\xce\xb7\x55\xa\x80\x39\x2f\x55\x36\x16\x9c\x7b\x8\xbd\x87\xd\xa5\x32\xf1\x52\x7c\xe8\x55\x60\x5b\xd7\x69\xe4\xfc\xfa\x12\x85\x96\xea\x50\x28\xab\x8a\xf7\xbb\xe\x53\x74\xca\xa6\x27\x9\xc2\xb5\xde\x18\x14\xd9\xea\xe5\x29\x1c\x40\x56\xcf\xd7\xae\x5\x3f\x65\xaf\x5\x73\xe2\x35\x96\x27\x7\x14\xc0\xad\x33\xf1\xdc\x44\x7a\x89\x17\x77\xd2\x9c\x58\x60\xf0\x3f\x7b\x2d\x2e\x57\x95\x54\x87\xed\xf2\xc7\x4c\xf0\xae\x56\x29\x19\x7d\x66\x4b\x9b\x83\x84\x42\x3b\x1\x25\x66\x8e\x2\xde\xb9\x83\x54\x19\xf6\x9f\x79\xd\x67\xc5\x1d\x7a\x44\x2\x98\xa7\x16\x1c\x29\xd\x74\xff\x85\x40\x6\xef\x2c\xa9\xc6\xf5\x53\x7\x6\xae\xe4\xfa\x5f\xd8\x39\x4d\xf1\x9b\x6b\xd9\x24\x84\xfe\x3\x4c\xb2\x3f\xdf\xa1\x5\x9e\x50\x14\x5a\xd9\x1a\xa2\xa7\xfa\xfa\x17\xf7\x78\xd6\xb5\x92\x61\x91\xac\x36\xfa\x56\xd\x38\x32\x18\x85\x8\x58\x37\xf0\x4b\xdb\x59\xe7\xa4\x34\xc0\x1b\x1\xaf\x2d\xde\xa1\xaa\x5d\xd3\xec\xe1\xd4\xf7\xe6\x54\x68\xf0\x51\x97\xa7\x89\xea\x24\xad\xd3\x6e\x47\x93\x8b\x4b\xb4\xf7\x1c\x42\x6\x67\xe8\x99\xf6\xf5\x7b\x85\xb5\x65\xb5\xb5\xd2\x37\xf5\xf3\x2\xa6\x4d\x11\xa7\xdc\x51\x9\x7f\xa0\xd8\x88\x1c\x13\x71\xae\x9c\xb7\x7b\x34\xd6\x4e\x68\x26\x83\x51\xaf\x1d\xee\x8b\xbb\x69\x43\x2b\x9e\x8a\xbc\x2\xe\xa0\x1b\xe0\xa8\x5f\x6f\xaf\x1b\x8f\xe7\x64\x71\x74\x11\x7e\xa8\xd8\xf9\x97\x6\xc3\xb6\xfb\xfb\xb7\x3d\x35\x9d\x3b\x52\xed\x54\xca\xf4\x81\x1\x2d\x1b\xc3\xa7\x0\x3d\x1a\x39\x54\xe1\xf6\xff\xed\x6f\xb\x5a\x68\xda\x58\xdd\xa9\xcf\x5c\x4a\xe5\x9\x4e\xde\x9d\xbc\x3e\xee\x5a\x0\x3b\x2c\x87\x10\x65\x60\xdd\xd7\x56\xd1\x4c\x64\x45\xe4\x21\xec\x78\xf8\x25\x7a\x3e\x16\x5d\x9\x53\x14\xbe\x4f\xae\x87\xd8\xd1\xaa\x3c\xf6\x3e\xa4\x70\x8c\x5e\x70\xa4\xb3\x6b\x66\x73\xd3\xbf\x31\x6\x19\x62\x93\x15\xf2\x86\xe4\x52\x7e\x53\x4c\x12\x38\xcc\x34\x7d\x57\xf6\x42\x93\x8a\xc4\xee\x5c\x8a\xe1\x52\x8f\x56\x64\xf6\xa6\xd1\x91\x57\x70\xcd\x11\x76\xf5\x59\x60\x60\x3c\xc1\xc3\xb\x7f\x58\x1a\x50\x91\xf1\x68\x8f\x6e\x74\x74\xa8\x51\xb\xf7\x7a\x98\x37\xf2\xa\xe\xa4\x97\x4\xb8\x9b\xfd\xa0\xea\xf7\xd\xe1\xdb\x3\xf0\x31\x29\xf8\xdd\x6b\x8b\x5d\xd8\x59\xa9\x29\xcf\x9a\x79\x89\x19\x63\x46\x9\x79\x6a\x11\xda\x63\x68\x48\x77\x23\xfb\x7d\x3a\x43\xcb\x2\x3b\x7a\x6d\x10\x2a\x9e\xac\xf1\xd4\x19\xf8\x23\x64\x1d\x2c\x5f\xf2\xb0\x5c\x23\x27\xf7\x27\x30\x16\x37\xb1\x90\xab\x38\xfb\x55\xcd\x78\x58\xd4\x7d\x43\xf6\x45\x5e\x55\x8d\xb1\x2\x65\x58\xb4\x13\x4b\x36\xf7\xcc\xfe\x3d\xb\x82\xe2\x12\x11\xbb\xe6\xb8\x3a\x48\x71\xc7\x50\x6\x16\x3a\xe6\x7c\x5\xc7\xc8\x4d\x2f\x8\x6a\x17\x9a\x95\x97\x50\x68\xdc\x28\x18\xc4\x61\x38\xb9\xe0\x3e\x78\xdb\x29\xe0\x9f\x52\xdd\xf8\x4f\x91\xc1\xd0\x33\xa1\x7a\x8e\x30\x13\x82\x7\x9f\xd3\x31\xf\x23\xbe\x32\x5a\x75\xcf\x96\xb2\xec\xb5\x32\xac\x21\xd1\x82\x33\xd3\x15\x74\xbd\x90\xf1\x2c\xe6\x5f\x8d\xe3\x2\xe8\xe9\xc4\xca\x96\xeb\xe\xbc\x91\xf4\xb9\xea\xd9\x1b\x75\xbd\xe1\xac\x2a\x5\x37\x52\x9b\x1b\x3f\x5a\xdc\x21\xc3\x98\xbb\xaf\xa3\xf2\x0\xbf\xd\x30\x89\x5\xcc\xa5\x76\xf5\x6\xf0\xc6\x54\x8a\x5d\xd4\x1e\xc1\xf2\xce\xb0\x62\xc8\xfc\x59\x42\x9a\x90\x60\x55\xfe\x88\xa5\x8b\xb8\x33\xc\x23\x24\xd\x15\x70\x37\x1e\x3d\xf6\xd2\xea\x92\x10\xb2\xc4\x51\xac\xf2\xac\xf3\x6b\x6c\xaa\xcf\x12\xc5\x6c\x90\x50\xb5\xc\xfc\x1a\x15\x52\xe9\x26\xc6\x52\xa4\xe7\x81\x69\xe1\xe7\x9e\x30\x1\xec\x84\x89\xb2\xd\x66\xdd\xce\x28\x5c\xec\x98\x46\x68\x21\x9f\x88\x3f\x1f\x42\x77\xce\xd0\x61\xd4\x20\xa7\xff\x53\xad\x37\xd0\x17\x35\xc9\xfc\xba\xa\x78\x3f\xf2\xcc\x86\x89\xe8\x4b\x3c\x48\x33\x9\x7f\xc6\xc0\xdd\xb8\xfd\x7a\x66\x66\x65\xeb\x47\xa7\x4\x28\xa3\x19\x8e\xa9\xb1\x13\x67\x62\x70\xcf\xd6"
+} };
+static const struct chacha20poly1305_testvec chacha20poly1305_dec_vectors[] __initconst = { {
+	.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a\xf3\x33\x88\x86\x04\xf6\xb5\xf0\x47\x39\x17\xc1\x40\x2b\x80\x09\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+	.nonce	= "\x01\x02\x03\x04\x05\x06\x07\x08",
+	.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00\x00\x00\x4e\x91",
+	.alen	= 12,
+	.input	= "\x64\xa0\x86\x15\x75\x86\x1a\xf4\x60\xf0\x62\xc7\x9b\xe6\x43\xbd\x5e\x80\x5c\xfd\x34\x5c\xf3\x89\xf1\x08\x67\x0a\xc7\x6c\x8c\xb2\x4c\x6c\xfc\x18\x75\x5d\x43\xee\xa0\x9e\xe9\x4e\x38\x2d\x26\xb0\xbd\xb7\xb7\x3c\x32\x1b\x01\x00\xd4\xf0\x3b\x7f\x35\x58\x94\xcf\x33\x2f\x83\x0e\x71\x0b\x97\xce\x98\xc8\xa8\x4a\xbd\x0b\x94\x81\x14\xad\x17\x6e\x00\x8d\x33\xbd\x60\xf9\x82\xb1\xff\x37\xc8\x55\x97\x97\xa0\x6e\xf4\xf0\xef\x61\xc1\x86\x32\x4e\x2b\x35\x06\x38\x36\x06\x90\x7b\x6a\x7c\x02\xb0\xf9\xf6\x15\x7b\x53\xc8\x67\xe4\xb9\x16\x6c\x76\x7b\x80\x4d\x46\xa5\x9b\x52\x16\xcd\xe7\xa4\xe9\x90\x40\xc5\xa4\x04\x33\x22\x5e\xe2\x82\xa1\xb0\xa0\x6c\x52\x3e\xaf\x45\x34\xd7\xf8\x3f\xa1\x15\x5b\x00\x47\x71\x8c\xbc\x54\x6a\x0d\x07\x2b\x04\xb3\x56\x4e\xea\x1b\x42\x22\x73\xf5\x48\x27\x1a\x0b\xb2\x31\x60\x53\xfa\x76\x99\x19\x55\xeb\xd6\x31\x59\x43\x4e\xce\xbb\x4e\x46\x6d\xae\x5a\x10\x73\xa6\x72\x76\x27\x09\x7a\x10\x49\xe6\x17\xd9\x1d\x36\x10\x94\xfa\x68\xf0\xff\x77\x98\x71\x30\x30\x5b\xea\xba\x2e\xda\x04\xdf\x99\x7b\x71\x4d\x6c\x6f\x2c\x29\xa6\xad\x5c\xb4\x02\x2b\x02\x70\x9b\xee\xad\x9d\x67\x89\x0c\xbb\x22\x39\x23\x36\xfe\xa1\x85\x1f\x38",
+	.ilen	= 281,
+	.result	= "\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x72\x65\x20\x64\x72\x61\x66\x74\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x76\x61\x6c\x69\x64\x20\x66\x6f\x72\x20\x61\x20\x6d\x61\x78\x69\x6d\x75\x6d\x20\x6f\x66\x20\x73\x69\x78\x20\x6d\x6f\x6e\x74\x68\x73\x20\x61\x6e\x64\x20\x6d\x61\x79\x20\x62\x65\x20\x75\x70\x64\x61\x74\x65\x64\x2c\x20\x72\x65\x70\x6c\x61\x63\x65\x64\x2c\x20\x6f\x72\x20\x6f\x62\x73\x6f\x6c\x65\x74\x65\x64\x20\x62\x79\x20\x6f\x74\x68\x65\x72\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x61\x74\x20\x61\x6e\x79\x20\x74\x69\x6d\x65\x2e\x20\x49\x74\x20\x69\x73\x20\x69\x6e\x61\x70\x70\x72\x6f\x70\x72\x69\x61\x74\x65\x20\x74\x6f\x20\x75\x73\x65\x20\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x73\x20\x72\x65\x66\x65\x72\x65\x6e\x63\x65\x20\x6d\x61\x74\x65\x72\x69\x61\x6c\x20\x6f\x72\x20\x74\x6f\x20\x63\x69\x74\x65\x20\x74\x68\x65\x6d\x20\x6f\x74\x68\x65\x72\x20\x74\x68\x61\x6e\x20\x61\x73\x20\x2f\xe2\x80\x9c\x77\x6f\x72\x6b\x20\x69\x6e\x20\x70\x72\x6f\x67\x72\x65\x73\x73\x2e\x2f\xe2\x80\x9d"
+}, {
+	.key	= "\x4c\xf5\x96\x83\x38\xe6\xae\x7f\x2d\x29\x25\x76\xd5\x75\x27\x86\x91\x9a\x27\x7a\xfb\x46\xc5\xef\x94\x81\x79\x57\x14\x59\x40\x68",
+	.nonce	= "\xca\xbf\x33\x71\x32\x45\x77\x8e",
+	.assoc	= "",
+	.alen	= 0,
+	.input	= "\xea\xe0\x1e\x9e\x2c\x91\xaa\xe1\xdb\x5d\x99\x3f\x8a\xf7\x69\x92",
+	.ilen	= 16,
+	.result	= ""
+}, {
+	.key	= "\x2d\xb0\x5d\x40\xc8\xed\x44\x88\x34\xd1\x13\xaf\x57\xa1\xeb\x3a\x2a\x80\x51\x36\xec\x5b\xbc\x8\x93\x84\x21\xb5\x13\x88\x3c\xd",
+	.nonce	= "\x3d\x86\xb5\x6b\xc8\xa3\x1f\x1d",
+	.assoc	= "\x33\x10\x41\x12\x1f\xf3\xd2\x6b",
+	.alen	= 8,
+	.input	= "\xdd\x6b\x3b\x82\xce\x5a\xbd\xd6\xa9\x35\x83\xd8\x8c\x3d\x85\x77",
+	.ilen	= 16,
+	.result	= ""
+}, {
+	.key	= "\x4b\x28\x4b\xa3\x7b\xbe\xe9\xf8\x31\x80\x82\xd7\xd8\xe8\xb5\xa1\xe2\x18\x18\x8a\x9c\xfa\xa3\x3d\x25\x71\x3e\x40\xbc\x54\x7a\x3e",
+	.nonce	= "\xd2\x32\x1f\x29\x28\xc6\xc4\xc4",
+	.assoc	= "\x6a\xe2\xad\x3f\x88\x39\x5a\x40",
+	.alen	= 8,
+	.input	= "\xb7\x1b\xb0\x73\x59\xb0\x84\xb2\x6d\x8e\xab\x94\x31\xa1\xae\xac\x89",
+	.ilen	= 17,
+	.result	= "\xa4"
+}, {
+	.key	= "\x66\xca\x9c\x23\x2a\x4b\x4b\x31\xe\x92\x89\x8b\xf4\x93\xc7\x87\x98\xa3\xd8\x39\xf8\xf4\xa7\x1\xc0\x2e\xa\xa6\x7e\x5a\x78\x87",
+	.nonce	= "\x20\x1c\xaa\x5f\x9c\xbf\x92\x30",
+	.assoc	= "",
+	.alen	= 0,
+	.input	= "\xbf\xe1\x5b\xb\xdb\x6b\xf5\x5e\x6c\x5d\x84\x44\x39\x81\xc1\x9c\xac",
+	.ilen	= 17,
+	.result	= "\x2d"
+}, {
+	.key	= "\x68\x7b\x8d\x8e\xe3\xc4\xdd\xae\xdf\x72\x7f\x53\x72\x25\x1e\x78\x91\xcb\x69\x76\x1f\x49\x93\xf9\x6f\x21\xcc\x39\x9c\xad\xb1\x1",
+	.nonce	= "\xdf\x51\x84\x82\x42\xc\x75\x9c",
+	.assoc	= "\x70\xd3\x33\xf3\x8b\x18\xb",
+	.alen	= 7,
+	.input	= "\x8b\x6\xd3\x31\xb0\x93\x45\xb1\x75\x6e\x26\xf9\x67\xbc\x90\x15\x81\x2c\xb5\xf0\xc6\x2b\xc7\x8c\x56\xd1\xbf\x69\x6c\x7\xa0\xda\x65\x27\xc9\x90\x3d\xef\x4b\x11\xf\x19\x7\xfd\x29\x92\xd9\xc8\xf7\x99\x2e\x4a\xd0\xb8\x2c\xdc\x93\xf5\x9e\x33\x78\xd1\x37\xc3\x66\xd7\x5e\xbc\x44\xbf\x53\xa5\xbc\xc4\xcb\x7b\x3a\x8e\x7f\x2\xbd\xbb\xe7\xca\xa6\x6c\x6b\x93\x21\x93\x10\x61\xe7\x69\xd0\x78\xf3\x7\x5a\x1a\x8f\x73\xaa\xb1\x4e\xd3\xda\x4f\xf3\x32\xe1\x66\x3e\x6c\xc6\x13\xba\x6\x5b\xfc\x6a\xe5\x6f\x60\xfb\x7\x40\xb0\x8c\x9d\x84\x43\x6b\xc1\xf7\x8d\x8d\x31\xf7\x7a\x39\x4d\x8f\x9a\xeb",
+	.ilen	= 145,
+	.result	= "\x33\x2f\x94\xc1\xa4\xef\xcc\x2a\x5b\xa6\xe5\x8f\x1d\x40\xf0\x92\x3c\xd9\x24\x11\xa9\x71\xf9\x37\x14\x99\xfa\xbe\xe6\x80\xde\x50\xc9\x96\xd4\xb0\xec\x9e\x17\xec\xd2\x5e\x72\x99\xfc\xa\xe1\xcb\x48\xd2\x85\xdd\x2f\x90\xe0\x66\x3b\xe6\x20\x74\xbe\x23\x8f\xcb\xb4\xe4\xda\x48\x40\xa6\xd1\x1b\xc7\x42\xce\x2f\xc\xa6\x85\x6e\x87\x37\x3\xb1\x7c\x25\x96\xa3\x5\xd8\xb0\xf4\xed\xea\xc2\xf0\x31\x98\x6c\xd1\x14\x25\xc0\xcb\x1\x74\xd0\x82\xf4\x36\xf5\x41\xd5\xdc\xca\xc5\xbb\x98\xfe\xfc\x69\x21\x70\xd8\xa4\x4b\xc8\xde\x8f"
+}, {
+	.key	= "\x8d\xb8\x91\x48\xf0\xe7\xa\xbd\xf9\x3f\xcd\xd9\xa0\x1e\x42\x4c\xe7\xde\x25\x3d\xa3\xd7\x5\x80\x8d\xf2\x82\xac\x44\x16\x51\x1",
+	.nonce	= "\xde\x7b\xef\xc3\x65\x1b\x68\xb0",
+	.assoc	= "",
+	.alen	= 0,
+	.input	= "\x85\x4\xc2\xed\x8d\xfd\x97\x5c\xd2\xb7\xe2\xc1\x6b\xa3\xba\xf8\xc9\x50\xc3\xc6\xa5\xe3\xa4\x7c\xc3\x23\x49\x5e\xa9\xb9\x32\xeb\x8a\x7c\xca\xe5\xec\xfb\x7c\xc0\xcb\x7d\xdc\x2c\x9d\x92\x55\x21\xa\xc8\x43\x63\x59\xa\x31\x70\x82\x67\x41\x3\xf8\xdf\xf2\xac\xa7\x2\xd4\xd5\x8a\x2d\xc8\x99\x19\x66\xd0\xf6\x88\x2c\x77\xd9\xd4\xd\x6c\xbd\x98\xde\xe7\x7f\xad\x7e\x8a\xfb\xe9\x4b\xe5\xf7\xe5\x50\xa0\x90\x3f\xd6\x22\x53\xe3\xfe\x1b\xcc\x79\x3b\xec\x12\x47\x52\xa7\xd6\x4\xe3\x52\xe6\x93\x90\x91\x32\x73\x79\xb8\xd0\x31\xde\x1f\x9f\x2f\x5\x38\x54\x2f\x35\x4\x39\xe0\xa7\xba\xc6\x52\xf6\x37\x65\x4c\x7\xa9\x7e\xb3\x21\x6f\x74\x8c\xc9\xde\xdb\x65\x1b\x9b\xaa\x60\xb1\x3\x30\x6b\xb2\x3\xc4\x1c\x4\xf8\xf\x64\xaf\x46\xe4\x65\x99\x49\xe2\xea\xce\x78\x0\xd8\x8b\xd5\x2e\xcf\xfc\x40\x49\xe8\x58\xdc\x34\x9c\x8c\x61\xbf\xa\x8e\xec\x39\xa9\x30\x5\x5a\xd2\x56\x1\xc7\xda\x8f\x4e\xbb\x43\xa3\x3a\xf9\x15\x2a\xd0\xa0\x7a\x87\x34\x82\xfe\x8a\xd1\x2d\x5e\xc7\xbf\x4\x53\x5f\x3b\x36\xd4\x25\x5c\x34\x7a\x8d\xd5\x5\xce\x72\xca\xef\x7a\x4b\xbc\xb0\x10\x5c\x96\x42\x3a\x0\x98\xcd\x15\xe8\xb7\x53",
+	.ilen	= 272,
+	.result	= "\x9b\x18\xdb\xdd\x9a\xf\x3e\xa5\x15\x17\xde\xdf\x8\x9d\x65\xa\x67\x30\x12\xe2\x34\x77\x4b\xc1\xd9\xc6\x1f\xab\xc6\x18\x50\x17\xa7\x9d\x3c\xa6\xc5\x35\x8c\x1c\xc0\xa1\x7c\x9f\x3\x89\xca\xe1\xe6\xe9\xd4\xd3\x88\xdb\xb4\x51\x9d\xec\xb4\xfc\x52\xee\x6d\xf1\x75\x42\xc6\xfd\xbd\x7a\x8e\x86\xfc\x44\xb3\x4f\xf3\xea\x67\x5a\x41\x13\xba\xb0\xdc\xe1\xd3\x2a\x7c\x22\xb3\xca\xac\x6a\x37\x98\x3e\x1d\x40\x97\xf7\x9b\x1d\x36\x6b\xb3\x28\xbd\x60\x82\x47\x34\xaa\x2f\x7d\xe9\xa8\x70\x81\x57\xd4\xb9\x77\xa\x9d\x29\xa7\x84\x52\x4f\xc2\x4a\x40\x3b\x3c\xd4\xc9\x2a\xdb\x4a\x53\xc4\xbe\x80\xe9\x51\x7f\x8f\xc7\xa2\xce\x82\x5c\x91\x1e\x74\xd9\xd0\xbd\xd5\xf3\xfd\xda\x4d\x25\xb4\xbb\x2d\xac\x2f\x3d\x71\x85\x7b\xcf\x3c\x7b\x3e\xe\x22\x78\xc\x29\xbf\xe4\xf4\x57\xb3\xcb\x49\xa0\xfc\x1e\x5\x4e\x16\xbc\xd5\xa8\xa3\xee\x5\x35\xc6\x7c\xab\x60\x14\x55\x1a\x8e\xc5\x88\x5d\xd5\x81\xc2\x81\xa5\xc4\x60\xdb\xaf\x77\x91\xe1\xce\xa2\x7e\x7f\x42\xe3\xb0\x13\x1c\x1f\x25\x60\x21\xe2\x40\x5f\x99\xb7\x73\xec\x9b\x2b\xf0\x65\x11\xc8\xd0\xa\x9f\xd3"
+}, {
+	.key	= "\xf2\xaa\x4f\x99\xfd\x3e\xa8\x53\xc1\x44\xe9\x81\x18\xdc\xf5\xf0\x3e\x44\x15\x59\xe0\xc5\x44\x86\xc3\x91\xa8\x75\xc0\x12\x46\xba",
+	.nonce	= "\xe\xd\x57\xbb\x7b\x40\x54\x2",
+	.assoc	= "",
+	.alen	= 0,
+	.input	= "\x14\xf6\x41\x37\xa6\xd4\x27\xcd\xdb\x6\x3e\x9a\x4e\xab\xd5\xb1\x1e\x6b\xd2\xbc\x11\xf4\x28\x93\x63\x54\xef\xbb\x5e\x1d\x3a\x1d\x37\x3c\xa\x6c\x1e\xc2\xd1\x2c\xb5\xa3\xb5\x7b\xb8\x8f\x25\xa6\x1b\x61\x1c\xec\x28\x58\x26\xa4\xa8\x33\x28\x25\x5c\x45\x5\xe5\x6c\x99\xe5\x45\xc4\xa2\x3\x84\x3\x73\x1e\x8c\x49\xac\x20\xdd\x8d\xb3\xc4\xf5\xe7\x4f\xf1\xed\xa1\x98\xde\xa4\x96\xdd\x2f\xab\xab\x97\xcf\x3e\xd2\x9e\xb8\x13\x7\x28\x29\x19\xaf\xfd\xf2\x49\x43\xea\x49\x26\x91\xc1\x7\xd6\xbb\x81\x75\x35\xd\x24\x7f\xc8\xda\xd4\xb7\xeb\xe8\x5c\x9\xa2\x2f\xdc\x28\x7d\x3a\x3\xfa\x94\xb5\x1d\x17\x99\x36\xc3\x1c\x18\x34\xe3\x9f\xf5\x55\x7c\xb0\x60\x9d\xff\xac\xd4\x61\xf2\xad\xf8\xce\xc7\xbe\x5c\xd2\x95\xa8\x4b\x77\x13\x19\x59\x26\xc9\xb7\x8f\x6a\xcb\x2d\x37\x91\xea\x92\x9c\x94\x5b\xda\xb\xce\xfe\x30\x20\xf8\x51\xad\xf2\xbe\xe7\xc7\xff\xb3\x33\x91\x6a\xc9\x1a\x41\xc9\xf\xf3\x10\xe\xfd\x53\xff\x6c\x16\x52\xd9\xf3\xf7\x98\x2e\xc9\x7\x31\x2c\xc\x72\xd7\xc5\xc6\x8\x2a\x7b\xda\xbd\x7e\x2\xea\x1a\xbb\xf2\x4\x27\x61\x28\x8e\xf5\x4\x3\x1f\x4c\x7\x55\x82\xec\x1e\xd7\x8b\x2f\x65\x56\xd1\xd9\x1e\x3c\xe9\x1f\x5e\x98\x70\x38\x4a\x8c\x49\xc5\x43\xa0\xa1\x8b\x74\x9d\x4c\x62\xd\x10\xc\xf4\x6c\x8f\xe0\xaa\x9a\x8d\xb7\xe0\xbe\x4c\x87\xf1\x98\x2f\xcc\xed\xc0\x52\x29\xdc\x83\xf8\xfc\x2c\xe\xa8\x51\x4d\x80\xd\xa3\xfe\xd8\x37\xe7\x41\x24\xfc\xfb\x75\xe3\x71\x7b\x57\x45\xf5\x97\x73\x65\x63\x14\x74\xb8\x82\x9f\xf8\x60\x2f\x8a\xf2\x4e\xf1\x39\xda\x33\x91\xf8\x36\xe0\x8d\x3f\x1f\x3b\x56\xdc\xa0\x8f\x3c\x9d\x71\x52\xa7\xb8\xc0\xa5\xc6\xa2\x73\xda\xf4\x4b\x74\x5b\x0\x3d\x99\xd7\x96\xba\xe6\xe1\xa6\x96\x38\xad\xb3\xc0\xd2\xba\x91\x6b\xf9\x19\xdd\x3b\xbe\xbe\x9c\x20\x50\xba\xa1\xd0\xce\x11\xbd\x95\xd8\xd1\xdd\x33\x85\x74\xdc\xdb\x66\x76\x44\xdc\x3\x74\x48\x35\x98\xb1\x18\x47\x94\x7d\xff\x62\xe4\x58\x78\xab\xed\x95\x36\xd9\x84\x91\x82\x64\x41\xbb\x58\xe6\x1c\x20\x6d\x15\x6b\x13\x96\xe8\x35\x7f\xdc\x40\x2c\xe9\xbc\x8a\x4f\x92\xec\x6\x2d\x50\xdf\x93\x5d\x65\x5a\xa8\xfc\x20\x50\x14\xa9\x8a\x7e\x1d\x8\x1f\xe2\x99\xd0\xbe\xfb\x3a\x21\x9d\xad\x86\x54\xfd\xd\x98\x1c\x5a\x6f\x1f\x9a\x40\xcd\xa2\xff\x6a\xf1\x54",
+	.ilen	= 528,
+	.result	= "\xc3\x9\x94\x62\xe6\x46\x2e\x10\xbe\x0\xe4\xfc\xf3\x40\xa3\xe2\xf\xc2\x8b\x28\xdc\xba\xb4\x3c\xe4\x21\x58\x61\xcd\x8b\xcd\xfb\xac\x94\xa1\x45\xf5\x1c\xe1\x12\xe0\x3b\x67\x21\x54\x5e\x8c\xaa\xcf\xdb\xb4\x51\xd4\x13\xda\xe6\x83\x89\xb6\x92\xe9\x21\x76\xa4\x93\x7d\xe\xfd\x96\x36\x3\x91\x43\x5c\x92\x49\x62\x61\x7b\xeb\x43\x89\xb8\x12\x20\x43\xd4\x47\x6\x84\xee\x47\xe9\x8a\x73\x15\xf\x72\xcf\xed\xce\x96\xb2\x7f\x21\x45\x76\xeb\x26\x28\x83\x6a\xad\xaa\xa6\x81\xd8\x55\xb1\xa3\x85\xb3\xc\xdf\xf1\x69\x2d\x97\x5\x2a\xbc\x7c\x7b\x25\xf8\x80\x9d\x39\x25\xf3\x62\xf0\x66\x5e\xf4\xa0\xcf\xd8\xfd\x4f\xb1\x1f\x60\x3a\x8\x47\xaf\xe1\xf6\x10\x77\x9\xa7\x27\x8f\x9a\x97\x5a\x26\xfa\xfe\x41\x32\x83\x10\xe0\x1d\xbf\x64\xd\xf4\x1c\x32\x35\xe5\x1b\x36\xef\xd4\x4a\x93\x4d\x0\x7c\xec\x2\x7\x8b\x5d\x7d\x1b\xe\xd1\xa6\xa5\x5d\x7d\x57\x88\xa8\xcc\x81\xb4\x86\x4e\xb4\x40\xe9\x1d\xc3\xb1\x24\x3e\x7f\xcc\x8a\x24\x9b\xdf\x6d\xf0\x39\x69\x3e\x4c\xc0\x96\xe4\x13\xda\x90\xda\xf4\x95\x66\x8b\x17\x17\xfe\x39\x43\x25\xaa\xda\xa0\x43\x3c\xb1\x41\x2\xa3\xf0\xa7\x19\x59\xbc\x1d\x7d\x6c\x6d\x91\x9\x5c\xb7\x5b\x1\xd1\x6f\x17\x21\x97\xbf\x89\x71\xa5\xb0\x6e\x7\x45\xfd\x9d\xea\x7\xf6\x7a\x9f\x10\x18\x22\x30\x73\xac\xd4\x6b\x72\x44\xed\xd9\x19\x9b\x2d\x4a\x41\xdd\xd1\x85\x5e\x37\x19\xed\xd2\x15\x8f\x5e\x91\xdb\x33\xf2\xe4\xdb\xff\x98\xfb\xa3\xb5\xca\x21\x69\x8\xe7\x8a\xdf\x90\xff\x3e\xe9\x20\x86\x3c\xe9\xfc\xb\xfe\x5c\x61\xaa\x13\x92\x7f\x7b\xec\xe0\x6d\xa8\x23\x22\xf6\x6b\x77\xc4\xfe\x40\x7\x3b\xb6\xf6\x8e\x5f\xd4\xb9\xb7\xf\x21\x4\xef\x83\x63\x91\x69\x40\xa3\x48\x5c\xd2\x60\xf9\x4f\x6c\x47\x8b\x3b\xb1\x9f\x8e\xee\x16\x8a\x13\xfc\x46\x17\xc3\xc3\x32\x56\xf8\x3c\x85\x3a\xb6\x3e\xaa\x89\x4f\xb3\xdf\x38\xfd\xf1\xe4\x3a\xc0\xe6\x58\xb5\x8f\xc5\x29\xa2\x92\x4a\xb6\xa0\x34\x7f\xab\xb5\x8a\x90\xa1\xdb\x4d\xca\xb6\x2c\x41\x3c\xf7\x2b\x21\xc3\xfd\xf4\x17\x5c\xb5\x33\x17\x68\x2b\x8\x30\xf3\xf7\x30\x3c\x96\xe6\x6a\x20\x97\xe7\x4d\x10\x5f\x47\x5f\x49\x96\x9\xf0\x27\x91\xc8\xf8\x5a\x2e\x79\xb5\xe2\xb8\xe8\xb9\x7b\xd5\x10\xcb\xff\x5d\x14\x73\xf3"
+}, {
+	.key	= "\xea\xbc\x56\x99\xe3\x50\xff\xc5\xcc\x1a\xd7\xc1\x57\x72\xea\x86\x5b\x89\x88\x61\x3d\x2f\x9b\xb2\xe7\x9c\xec\x74\x6e\x3e\xf4\x3b",
+	.nonce	= "\xef\x2d\x63\xee\x6b\x80\x8b\x78",
+	.assoc	= "\x5a\x27\xff\xeb\xdf\x84\xb2\x9e\xef",
+	.alen	= 9,
+	.input	= "\xfd\x81\x8d\xd0\x3d\xb4\xd5\xdf\xd3\x42\x47\x5a\x6d\x19\x27\x66\x4b\x2e\xc\x27\x9c\x96\x4c\x72\x2\xa3\x65\xc3\xb3\x6f\x2e\xbd\x63\x8a\x4a\x5d\x29\xa2\xd0\x28\x48\xc5\x3d\x98\xa3\xbc\xe0\xbe\x3b\x3f\xe6\x8a\xa4\x7f\x53\x6\xfa\x7f\x27\x76\x72\x31\xa1\xf5\xd6\xc\x52\x47\xba\xcd\x4f\xd7\xeb\x5\x48\xd\x7c\x35\x4a\x9\xc9\x76\x71\x2\xa3\xfb\xb7\x1a\x65\xb7\xed\x98\xc6\x30\x8a\x0\xae\xa1\x31\xe5\xb5\x9e\x6d\x62\xda\xda\x7\xf\x38\x38\xd3\xcb\xc1\xb0\xad\xec\x72\xec\xb1\xa2\x7b\x59\xf3\x3d\x2b\xef\xcd\x28\x5b\x83\xcc\x18\x91\x88\xb0\x2e\xf9\x29\x31\x18\xf9\x4e\xe9\xa\x91\x92\x9f\xae\x2d\xad\xf4\xe6\x1a\xe2\xa4\xee\x47\x15\xbf\x83\x6e\xd7\x72\x12\x3b\x2d\x24\xe9\xb2\x55\xcb\x3c\x10\xf0\x24\x8a\x4a\x2\xea\x90\x25\xf0\xb4\x79\x3a\xef\x6e\xf5\x52\xdf\xb0\xa\xcd\x24\x1c\xd3\x2e\x22\x74\xea\x21\x6f\xe9\xbd\xc8\x3e\x36\x5b\x19\xf1\xca\x99\xa\xb4\xa7\x52\x1a\x4e\xf2\xad\x8d\x56\x85\xbb\x64\x89\xba\x26\xf9\xc7\xe1\x89\x19\x22\x77\xc3\xa8\xfc\xff\xad\xfe\xb9\x48\xae\x12\x30\x9f\x19\xfb\x1b\xef\x14\x87\x8a\x78\x71\xf3\xf4\xb7\x0\x9c\x1d\xb5\x3d\x49\x0\xc\x6\xd4\x50\xf9\x54\x45\xb2\x5b\x43\xdb\x6d\xcf\x1a\xe9\x7a\x7a\xcf\xfc\x8a\x4e\x4d\xb\x7\x63\x28\xd8\xe7\x8\x95\xdf\xa6\x72\x93\x2e\xbb\xa0\x42\x89\x16\xf1\xd9\xc\xf9\xa1\x16\xfd\xd9\x3\xb4\x3b\x8a\xf5\xf6\xe7\x6b\x2e\x8e\x4c\x3d\xe2\xaf\x8\x45\x3\xff\x9\xb6\xeb\x2d\xc6\x1b\x88\x94\xac\x3e\xf1\x9f\xe\xe\x2b\xd5\x0\x4d\x3f\x3b\x53\xae\xaf\x1c\x33\x5f\x55\x6e\x8d\xaf\x5\x7a\x10\x34\xc9\xf4\x66\xcb\x62\x12\xa6\xee\xe8\x1c\x5d\x12\x86\xdb\x6f\x1c\x33\xc4\x1c\xda\x82\x2d\x3b\x59\xfe\xb1\xa4\x59\x41\x86\xd0\xef\xae\xfb\xda\x6d\x11\xb8\xca\xe9\x6e\xff\xf7\xa9\xd9\x70\x30\xfc\x53\xe2\xd7\xa2\x4e\xc7\x91\xd9\x7\x6\xaa\xdd\xb0\x59\x28\x1d\x0\x66\xc5\x54\xc2\xfc\x6\xda\x5\x90\x52\x1d\x37\x66\xee\xf0\xb2\x55\x8a\x5d\xd2\x38\x86\x94\x9b\xfc\x10\x4c\xa1\xb9\x64\x3e\x44\xb8\x5f\xb0\xc\xec\xe0\xc9\xe5\x62\x75\x3f\x9\xd5\xf5\xd9\x26\xba\x9e\xd2\xf4\xb9\x48\xa\xbc\xa2\xd6\x7c\x36\x11\x7d\x26\x81\x89\xcf\xa4\xad\x73\xe\xee\xcc\x6\xa9\xdb\xb1\xfd\xfb\x9\x7f\x90\x42\x37\x2f\xe1\x9c\xf\x6f\xcf\x43\xb5\xd9\x90\xe1\x85\xf5\xa8\xae",
+	.ilen	= 529,
+	.result	= "\xe6\xc3\xdb\x63\x55\x15\xe3\x5b\xb7\x4b\x27\x8b\x5a\xdd\xc2\xe8\x3a\x6b\xd7\x81\x96\x35\x97\xca\xd7\x68\xe8\xef\xce\xab\xda\x9\x6e\xd6\x8e\xcb\x55\xb5\xe1\xe5\x57\xfd\xc4\xe3\xe0\x18\x4f\x85\xf5\x3f\x7e\x4b\x88\xc9\x52\x44\xf\xea\xaf\x1f\x71\x48\x9f\x97\x6d\xb9\x6f\x0\xa6\xde\x2b\x77\x8b\x15\xad\x10\xa0\x2b\x7b\x41\x90\x3\x2d\x69\xae\xcc\x77\x7c\xa5\x9d\x29\x22\xc2\xea\xb4\x0\x1a\xd2\x7a\x98\x8a\xf9\xf7\x82\xb0\xab\xd8\xa6\x94\x8d\x58\x2f\x1\x9e\x0\x20\xfc\x49\xdc\xe\x3\xe8\x45\x10\xd6\xa8\xda\x55\x10\x9a\xdf\x67\x22\x8b\x43\xab\x0\xbb\x2\xc8\xdd\x7b\x97\x17\xd7\x1d\x9e\x2\x5e\x48\xde\x8e\xcf\x99\x7\x95\x92\x3c\x5f\x9f\xc5\x8a\xc0\x23\xaa\xd5\x8c\x82\x6e\x16\x92\xb1\x12\x17\x7\xc3\xfb\x36\xf5\x6c\x35\xd6\x6\x1f\x9f\xa7\x94\xa2\x38\x63\x9c\xb0\x71\xb3\xa5\xd2\xd8\xba\x9f\x8\x1\xb3\xff\x4\x97\x73\x45\x1b\xd5\xa9\x9c\x80\xaf\x4\x9a\x85\xdb\x32\x5b\x5d\x1a\xc1\x36\x28\x10\x79\xf1\x3c\xbf\x1a\x41\x5c\x4e\xdf\xb2\x7c\x79\x3b\x7a\x62\x3d\x4b\xc9\x9b\x2a\x2e\x7c\xa2\xb1\x11\x98\xa7\x34\x1a\x0\xf3\xd1\xbc\x18\x22\xba\x2\x56\x62\x31\x10\x11\x6d\xe0\x54\x9d\x40\x1f\x26\x80\x41\xca\x3f\x68\xf\x32\x1d\xa\x8e\x79\xd8\xa4\x1b\x29\x1c\x90\x8e\xc5\xe3\xb4\x91\x37\x9a\x97\x86\x99\xd5\x9\xc5\xbb\xa3\x3f\x21\x29\x82\x14\x5c\xab\x25\xfb\xf2\x4f\x58\x26\xd4\x83\xaa\x66\x89\x67\x7e\xc0\x49\xe1\x11\x10\x7f\x7a\xda\x29\x4\xff\xf0\xcb\x9\x7c\x9d\xfa\x3\x6f\x81\x9\x31\x60\xfb\x8\xfa\x74\xd3\x64\x44\x7c\x55\x85\xec\x9c\x6e\x25\xb7\x6c\xc5\x37\xb6\x83\x87\x72\x95\x8b\x9d\xe1\x69\x5c\x31\x95\x42\xa6\x2c\xd1\x36\x47\x1f\xec\x54\xab\xa2\x1c\xd8\x0\xcc\xbc\xd\x65\xe2\x67\xbf\xbc\xea\xee\x9e\xe4\x36\x95\xbe\x73\xd9\xa6\xd9\xf\xa0\xcc\x82\x76\x26\xad\x5b\x58\x6c\x4e\xab\x29\x64\xd3\xd9\xa9\x8\x8c\x1d\xa1\x4f\x80\xd8\x3f\x94\xfb\xd3\x7b\xfc\xd1\x2b\xc3\x21\xeb\xe5\x1c\x84\x23\x7f\x4b\xfa\xdb\x34\x18\xa2\xc2\xe5\x13\xfe\x6c\x49\x81\xd2\x73\xe7\xe2\xd7\xe4\x4f\x4b\x8\x6e\xb1\x12\x22\x10\x9d\xac\x51\x1e\x17\xd9\x8a\xb\x42\x88\x16\x81\x37\x7c\x6a\xf7\xef\x2d\xe3\xd9\xf8\x5f\xe0\x53\x27\x74\xb9\xe2\xd6\x1c\x80\x2c\x52\x65"
+}, {
+	.key	= "\x47\x11\xeb\x86\x2b\x2c\xab\x44\x34\xda\x7f\x57\x3\x39\xc\xaf\x2c\x14\xfd\x65\x23\xe9\x8e\x74\xd5\x8\x68\x8\xe7\xb4\x72\xd7",
+	.nonce	= "\xdb\x92\xf\x7f\x17\x54\xc\x30",
+	.assoc	= "\xd2\xa1\x70\xdb\x7a\xf8\xfa\x27\xba\x73\xf\xbf\x3d\x1e\x82\xb2",
+	.alen	= 16,
+	.input	= "\xe5\x26\xa4\x3d\xbd\x33\xd0\x4b\x6f\x5\xa7\x6e\x12\x7a\xd2\x74\xa6\xdd\xbd\x95\xeb\xf9\xa4\xf1\x59\x93\x91\x70\xd9\xfe\x9a\xcd\x53\x1f\x3a\xab\xa6\x7c\x9f\xa6\x9e\xbd\x99\xd9\xb5\x97\x44\xd5\x14\x48\x4d\x9d\xc0\xd0\x5\x96\xeb\x4c\x78\x55\x9\x8\x1\x2\x30\x90\x7b\x96\x7a\x7b\x5f\x30\x41\x24\xce\x68\x61\x49\x86\x57\x82\xdd\x53\x1c\x51\x28\x2b\x53\x6e\x2d\xc2\x20\x4c\xdd\x8f\x65\x10\x20\x50\xdd\x9d\x50\xe5\x71\x40\x53\x69\xfc\x77\x48\x11\xb9\xde\xa4\x8d\x58\xe4\xa6\x1a\x18\x47\x81\x7e\xfc\xdd\xf6\xef\xce\x2f\x43\x68\xd6\x6\xe2\x74\x6a\xad\x90\xf5\x37\xf3\x3d\x82\x69\x40\xe9\x6b\xa7\x3d\xa8\x1e\xd2\x2\x7c\xb7\x9b\xe4\xda\x8f\x95\x6\xc5\xdf\x73\xa3\x20\x9a\x49\xde\x9c\xbc\xee\x14\x3f\x81\x5e\xf8\x3b\x59\x3c\xe1\x68\x12\x5a\x3a\x76\x3a\x3f\xf7\x87\x33\xa\x1\xb8\xd4\xed\xb6\xbe\x94\x5e\x70\x40\x56\x67\x1f\x50\x44\x19\xce\x82\x70\x10\x87\x13\x20\xb\x4c\x5a\xb6\xf6\xa7\xae\x81\x75\x1\x81\xe6\x4b\x57\x7c\xdd\x6d\xf8\x1c\x29\x32\xf7\xda\x3c\x2d\xf8\x9b\x25\x6e\x0\xb4\xf7\x2f\xf7\x4\xf7\xa1\x56\xac\x4f\x1a\x64\xb8\x47\x55\x18\x7b\x7\x4d\xbd\x47\x24\x80\x5d\xa2\x70\xc5\xdd\x8e\x82\xd4\xeb\xec\xb2\xc\x39\xd2\x97\xc1\xcb\xeb\xf4\x77\x59\xb4\x87\xef\xcb\x43\x2d\x46\x54\xd1\xa7\xd7\x15\x99\xa\x43\xa1\xe0\x99\x33\x71\xc1\xed\xfe\x72\x46\x33\x8e\x91\x8\x9f\xc8\x2e\xca\xfa\xdc\x59\xd5\xc3\x76\x84\x9f\xa3\x37\x68\xc3\xf0\x47\x2c\x68\xdb\x5e\xc3\x49\x4c\xe8\x92\x85\xe2\x23\xd3\x3f\xad\x32\xe5\x2b\x82\xd7\x8f\x99\xa\x59\x5c\x45\xd9\xb4\x51\x52\xc2\xae\xbf\x80\xcf\xc9\xc9\x51\x24\x2a\x3b\x3a\x4d\xae\xeb\xbd\x22\xc3\xe\xf\x59\x25\x92\x17\xe9\x74\xc7\x8b\x70\x70\x36\x55\x95\x75\x4b\xad\x61\x2b\x9\xbc\x82\xf2\x6e\x94\x43\xae\xc3\xd5\xcd\x8e\xfe\x5b\x9a\x88\x43\x1\x75\xb2\x23\x9\xf7\x89\x83\xe7\xfa\xf9\xb4\x9b\xf8\xef\xbd\x1c\x92\xc1\xda\x7e\xfe\x5\xba\x5a\xcd\x7\x6a\x78\x9e\x5d\xfb\x11\x2f\x79\x38\xb6\xc2\x5b\x6b\x51\xb4\x71\xdd\xf7\x2a\xe4\xf4\x72\x76\xad\xc2\xdd\x64\x5d\x79\xb6\xf5\x7a\x77\x20\x5\x3d\x30\x6\xd4\x4c\xa\x2c\x98\x5a\xb9\xd4\x98\xa9\x3f\xc6\x12\xea\x3b\x4b\xc5\x79\x64\x63\x6b\x9\x54\x3b\x14\x27\xba\x99\x80\xc8\x72\xa8\x12\x90\x29\xba\x40\x54\x97\x2b\x7b\xfe\xeb\xcd\x1\x5\x44\x72\xdb\x99\xe4\x61\xc9\x69\xd6\xb9\x28\xd1\x5\x3e\xf9\xb\x49\xa\x49\xe9\x8d\xe\xa7\x4a\xf\xaf\x32\xd0\xe0\xb2\x3a\x55\x58\xfe\x5c\x28\x70\x51\x23\xb0\x7b\x6a\x5f\x1e\xb8\x17\xd7\x94\x15\x8f\xee\x20\xc7\x42\x25\x3e\x9a\x14\xd7\x60\x72\x39\x47\x48\xa9\xfe\xdd\x47\xa\xb1\xe6\x60\x28\x8c\x11\x68\xe1\xff\xd7\xce\xc8\xbe\xb3\xfe\x27\x30\x9\x70\xd7\xfa\x2\x33\x3a\x61\x2e\xc7\xff\xa4\x2a\xa8\x6e\xb4\x79\x35\x6d\x4c\x1e\x38\xf8\xee\xd4\x84\x4e\x6e\x28\xa7\xce\xc8\xc1\xcf\x80\x5\xf3\x4\xef\xc8\x18\x28\x2e\x8d\x5e\xc\xdf\xb8\x5f\x96\xe8\xc6\x9c\x2f\xe5\xa6\x44\xd7\xe7\x99\x44\xc\xec\xd7\x5\x60\x97\xbb\x74\x77\x58\xd5\xbb\x48\xde\x5a\xb2\x54\x7f\xe\x46\x70\x6a\x6f\x78\xa5\x8\x89\x5\x4e\x7e\xa0\x69\xb4\x40\x60\x55\x77\x75\x9b\x19\xf2\xd5\x13\x80\x77\xf9\x4b\x3f\x1e\xee\xe6\x76\x84\x7b\x8c\xe5\x27\xa8\xa\x91\x1\x68\x71\x8a\x3f\x6\xab\xf6\xa9\xa5\xe6\x72\x92\xe4\x67\xe2\xa2\x46\x35\x84\x55\x7d\xca\xa8\x85\xd0\xf1\x3f\xbe\xd7\x34\x64\xfc\xae\xe3\xe4\x4\x9f\x66\x2\xb9\x88\x10\xd9\xc4\x4c\x31\x43\x7a\x93\xe2\x9b\x56\x43\x84\xdc\xdc\xde\x1d\xa4\x2\xe\xc2\xef\xc3\xf8\x78\xd1\xb2\x6b\x63\x18\xc9\xa9\xe5\x72\xd8\xf3\xb9\xd1\x8a\xc7\x1a\x2\x27\x20\x77\x10\xe5\xc8\xd4\x4a\x47\xe5\xdf\x5f\x1\xaa\xb0\xd4\x10\xbb\x69\xe3\x36\xc8\xe1\x3d\x43\xfb\x86\xcd\xcc\xbf\xf4\x88\xe0\x20\xca\xb7\x1b\xf1\x2f\x5c\xee\xd4\xd3\xa3\xcc\xa4\x1e\x1c\x47\xfb\xbf\xfc\xa2\x41\x55\x9d\xf6\x5a\x5e\x65\x32\x34\x7b\x52\x8d\xd5\xd0\x20\x60\x3\xab\x3f\x8c\xd4\x21\xea\x2a\xd9\xc4\xd0\xd3\x65\xd8\x7a\x13\x28\x62\x32\x4b\x2c\x87\x93\xa8\xb4\x52\x45\x9\x44\xec\xec\xc3\x17\xdb\x9a\x4d\x5c\xa9\x11\xd4\x7d\xaf\x9e\xf1\x2d\xb2\x66\xc5\x1d\xed\xb7\xcd\xb\x25\x5e\x30\x47\x3f\x40\xf4\xa1\xa0\x0\x94\x10\xc5\x6a\x63\x1a\xd5\x88\x92\x8e\x82\x39\x87\x3c\x78\x65\x58\x42\x75\x5b\xdd\x77\x3e\x9\x4e\x76\x5b\xe6\xe\x4d\x38\xb2\xc0\xb8\x95\x1\x7a\x10\xe0\xfb\x7\xf2\xab\x2d\x8c\x32\xed\x2b\xc0\x46\xc2\xf5\x38\x83\xf0\x17\xec\xc1\x20\x6a\x9a\xb\x0\xa0\x98\x22\x50\x23\xd5\x80\x6b\xf6\x1f\xc3\xcc\x97\xc9\x24\x9f\xf3\xaf\x43\x14\xd5\xa0",
+	.ilen	= 1040,
+	.result	= "\x42\x93\xe4\xeb\x97\xb0\x57\xbf\x1a\x8b\x1f\xe4\x5f\x36\x20\x3c\xef\xa\xa9\x48\x5f\x5f\x37\x22\x3a\xde\xe3\xae\xbe\xad\x7\xcc\xb1\xf6\xf5\xf9\x56\xdd\xe7\x16\x1e\x7f\xdf\x7a\x9e\x75\xb7\xc7\xbe\xbe\x8a\x36\x4\xc0\x10\xf4\x95\x20\x3\xec\xdc\x5\xa1\x7d\xc4\xa9\x2c\x82\xd0\xbc\x8b\xc5\xc7\x45\x50\xf6\xa2\x1a\xb5\x46\x3b\x73\x2\xa6\x83\x4b\x73\x82\x58\x5e\x3b\x65\x2f\xe\xfd\x2b\x59\x16\xce\xa1\x60\x9c\xe8\x3a\x99\xed\x8d\x5a\xcf\xf6\x83\xaf\xba\xd7\x73\x73\x40\x97\x3d\xca\xef\x7\x57\xe6\xd9\x70\xe\x95\xae\xa6\x8d\x4\xcc\xee\xf7\x9\x31\x77\x12\xa3\x23\x97\x62\xb3\x7b\x32\xfb\x80\x14\x48\x81\xc3\xe5\xea\x91\x39\x52\x81\xa2\x4f\xe4\xb3\x9\xff\xde\x5e\xe9\x58\x84\x6e\xf9\x3d\xdf\x25\xea\xad\xae\xe6\x9a\xd1\x89\x55\xd3\xde\x6c\x52\xdb\x70\xfe\x37\xce\x44\xa\xa8\x25\x5f\x92\xc1\x33\x4a\x4f\x9b\x62\x35\xff\xce\xc0\xa9\x60\xce\x52\x0\x97\x51\x35\x26\x2e\xb9\x36\xa9\x87\x6e\x1e\xcc\x91\x78\x53\x98\x86\x5b\x9c\x74\x7d\x88\x33\xe1\xdf\x37\x69\x2b\xbb\xf1\x4d\xf4\xd1\xf1\x39\x93\x17\x51\x19\xe3\x19\x1e\x76\x37\x25\xfb\x9\x27\x6a\xab\x67\x6f\x14\x12\x64\xe7\xc4\x7\xdf\x4d\x17\xbb\x6d\xe0\xe9\xb9\xab\xca\x10\x68\xaf\x7e\xb7\x33\x54\x73\x7\x6e\xf7\x81\x97\x9c\x5\x6f\x84\x5f\xd2\x42\xfb\x38\xcf\xd1\x2f\x14\x30\x88\x98\x4d\x5a\xa9\x76\xd5\x4f\x3e\x70\x6c\x85\x76\xd7\x1\xa0\x1a\xc8\x4e\xaa\xac\x78\xfe\x46\xde\x6a\x5\x46\xa7\x43\xc\xb9\xde\xb9\x68\xfb\xce\x42\x99\x7\x4d\xb\x3b\x5a\x30\x35\xa8\xf9\x3a\x73\xef\xf\xdb\x1e\x16\x42\xc4\xba\xae\x58\xaa\xf8\xe5\x75\x2f\x1b\x15\x5c\xfd\xa\x97\xd0\xe4\x37\x83\x61\x5f\x43\xa6\xc7\x3f\x38\x59\xe6\xeb\xa3\x90\xc3\xaa\xaa\x5a\xd3\x34\xd4\x17\xc8\x65\x3e\x57\xbc\x5e\xdd\x9e\xb7\xf0\x2e\x5b\xb2\x1f\x8a\x8\xd\x45\x91\xb\x29\x53\x4f\x4c\x5a\x73\x56\xfe\xaf\x41\x1\x39\xa\x24\x3c\x7e\xbe\x4e\x53\xf3\xeb\x6\x66\x51\x28\x1d\xbd\x41\xa\x1\xab\x16\x47\x27\x47\x47\xf7\xcb\x46\xa\x70\x9e\x1\x9c\x9\xe1\x2a\x0\x1a\xd8\xd4\x79\x9d\x80\x15\x8e\x53\x2a\x65\x83\x78\x3e\x3\x0\x7\x12\x1f\x33\x3e\x7b\x13\x37\xf1\xc3\xef\xb7\xc1\x20\x3c\x3e\x67\x66\x5d\x88\xa7\x7d\x33\x50\x77\xb0\x28\x8e\xe7\x2c\x2e\x7a\xf4\x3c\x8d\x74\x83\xaf\x8e\x87\xf\xe4\x50\xff\x84\x5c\x47\xc\x6a\x49\xbf\x42\x86\x77\x15\x48\xa5\x90\x5d\x93\xd6\x2a\x11\xd5\xd5\x11\xaa\xce\xe7\x6f\xa5\xb0\x9\x2c\x8d\xd3\x92\xf0\x5a\x2a\xda\x5b\x1e\xd5\x9a\xc4\xc4\xf3\x49\x74\x41\xca\xe8\xc1\xf8\x44\xd6\x3c\xae\x6c\x1d\x9a\x30\x4\x4d\x27\xe\xb1\x5f\x59\xa2\x24\xe8\xe1\x98\xc5\x6a\x4c\xfe\x41\xd2\x27\x42\x52\xe1\xe9\x7d\x62\xe4\x88\xf\xad\xb2\x70\xcb\x9d\x4c\x27\x2e\x76\x1e\x1a\x63\x65\xf5\x3b\xf8\x57\x69\xeb\x5b\x38\x26\x39\x33\x25\x45\x3e\x91\xb8\xd8\xc7\xd5\x42\xc0\x22\x31\x74\xf4\xbc\xc\x23\xf1\xca\xc1\x8d\xd7\xbe\xc9\x62\xe4\x8\x1a\xcf\x36\xd5\xfe\x55\x21\x59\x91\x87\x87\xdf\x6\xdb\xdf\x96\x45\x58\xda\x5\xcd\x50\x4d\xd2\x7d\x5\x18\x73\x6a\x8d\x11\x85\xa6\x88\xe8\xda\xe6\x30\x33\xa4\x89\x31\x75\xbe\x69\x43\x84\x43\x50\x87\xdd\x71\x36\x83\xc3\x78\x74\x24\xa\xed\x7b\xdb\xa4\x24\xb\xb9\x7e\x5d\xff\xde\xb1\xef\x61\x5a\x45\x33\xf6\x17\x7\x8\x98\x83\x92\xf\x23\x6d\xe6\xaa\x17\x54\xad\x6a\xc8\xdb\x26\xbe\xb8\xb6\x8\xfa\x68\xf1\xd7\x79\x6f\x18\xb4\x9e\x2d\x3f\x1b\x64\xaf\x8d\x6\xe\x49\x28\xe0\x5d\x45\x68\x13\x87\xfa\xde\x40\x7b\xd2\xc3\x94\xd5\xe1\xd9\xc2\xaf\x55\x89\xeb\xb4\x12\x59\xa8\xd4\xc5\x29\x66\x38\xe6\xac\x22\x22\xd9\x64\x9b\x34\xa\x32\x9f\xc2\xbf\x17\x6c\x3f\x71\x7a\x38\x6b\x98\xfb\x49\x36\x89\xc9\xe2\xd6\xc7\x5d\xd0\x69\x5f\x23\x35\xc9\x30\xe2\xfd\x44\x58\x39\xd7\x97\xfb\x5c\x0\xd5\x4f\x7a\x1a\x95\x8b\x62\x4b\xce\xe5\x91\x21\x7b\x30\x0\xd6\xdd\x6d\x2\x86\x49\xf\x3c\x1a\x27\x3c\xd3\xe\x71\xf2\xff\xf5\x2f\x87\xac\x67\x59\x81\xa3\xf7\xf8\xd6\x11\xc\x84\xa9\x3\xee\x2a\xc4\xf3\x22\xab\x7c\xe2\x25\xf5\x67\xa3\xe4\x11\xe0\x59\xb3\xca\x87\xa0\xae\xc9\xa6\x62\x1b\x6e\x4d\x2\x6b\x7\x9d\xfd\xd0\x92\x6\xe1\xb2\x9a\x4a\x1f\x1f\x13\x49\x99\x97\x8\xde\x7f\x98\xaf\x51\x98\xee\x2c\xcb\xf0\xb\xc6\xb6\xb7\x2d\x9a\xb1\xac\xa6\xe3\x15\x77\x9d\x6b\x1a\xe4\xfc\x8b\xf2\x17\x59\x8\x4\x58\x81\x9d\x1b\x1b\x69\x55\xc2\xb4\x3c\x1f\x50\xf1\x7f\x77\x90\x4c\x66\x40\x5a\xc0\x33\x1f\xcb\x5\x6d\x5c\x6\x87\x52\xa2\x8f\x26\xd5\x4f"
+}, {
+	.key	= "\x35\x4e\xb5\x70\x50\x42\x8a\x85\xf2\xfb\xed\x7b\xd0\x9e\x97\xca\xfa\x98\x66\x63\xee\x37\xcc\x52\xfe\xd1\xdf\x95\x15\x34\x29\x38",
+	.nonce	= "\xfd\x87\xd4\xd8\x62\xfd\xec\xaa",
+	.assoc	= "\xd6\x31\xda\x5d\x42\x5e\xd7",
+	.alen	= 7,
+	.input	= "\x6a\xfc\x4b\x25\xdf\xc0\xe4\xe8\x17\x4d\x4c\xc9\x7e\xde\x3a\xcc\x3c\xba\x6a\x77\x47\xdb\xe3\x74\x7a\x4d\x5f\x8d\x37\x55\x80\x73\x90\x66\x5d\x3a\x7d\x5d\x86\x5e\x8d\xfd\x83\xff\x4e\x74\x6f\xf9\xe6\x70\x17\x70\x3e\x96\xa7\x7e\xcb\xab\x8f\x58\x24\x9b\x1\xfd\xcb\xe6\x4d\x9b\xf0\x88\x94\x57\x66\xef\x72\x4c\x42\x6e\x16\x19\x15\xea\x70\x5b\xac\x13\xdb\x9f\x18\xe2\x3c\x26\x97\xbc\xdc\x45\x8c\x6c\x24\x69\x9c\xf7\x65\x1e\x18\x59\x31\x7c\xe4\x73\xbc\x39\x62\xc6\x5c\x9f\xbf\xfa\x90\x3\xc9\x72\x26\xb6\x1b\xc2\xb7\x3f\xf2\x13\x77\xf2\x8d\xb9\x47\xd0\x53\xdd\xc8\x91\x83\x8b\xb1\xce\xa3\xfe\xcd\xd9\xdd\x92\x7b\xdb\xb8\xfb\xc9\x2d\x1\x59\x39\x52\xad\x1b\xec\xcf\xd7\x70\x13\x21\xf5\x47\xaa\x18\x21\x5c\xc9\x9a\xd2\x6b\x5\x9c\x1\xa1\xda\x35\x5d\xb3\x70\xe6\xa9\x80\x8b\x91\xb7\xb3\x5f\x24\x9a\xb7\xd1\x6b\xa1\x1c\x50\xba\x49\xe0\xee\x2e\x75\xac\x69\xc0\xeb\x3\xdd\x19\xe5\xf6\x6\xdd\xc3\xd7\x2b\x7\x7\x30\xa7\x19\xc\xbf\xe6\x18\xcc\xb1\x1\x11\x85\x77\x1d\x96\xa7\xa3\x0\x84\x2\xa2\x83\x68\xda\x17\x27\xc8\x7f\x23\xb7\xf4\x13\x85\xcf\xdd\x7a\x7d\x24\x57\xfe\x5\x93\xf5\x74\xce\xed\xc\x20\x98\x8d\x92\x30\xa1\x29\x23\x1a\xa0\x4f\x69\x56\x4c\xe1\xc8\xce\xf6\x9a\xc\xa4\xfa\x4\xf6\x62\x95\xf2\xfa\xc7\x40\x68\x40\x8f\x41\xda\xb4\x26\x6f\x70\xab\x40\x61\xa4\xe\x75\xfb\x86\xeb\x9d\x9a\x1f\xec\x76\x99\xe7\xea\xaa\x1e\x2d\xb5\xd4\xa6\x1a\xb8\x61\xa\x1d\x16\x5b\x98\xc2\x31\x40\xe7\x23\x1d\x66\x99\xc8\xc0\xd7\xce\xf3\x57\x40\x4\x3f\xfc\xea\xb3\xfc\xd2\xd3\x99\xa4\x94\x69\xa0\xef\xd1\x85\xb3\xa6\xb1\x28\xbf\x94\x67\x22\xc3\x36\x46\xf8\xd2\xf\x5f\xf4\x59\x80\xe6\x2d\x43\x8\x7d\x19\x9\x97\xa7\x4c\x3d\x8d\xba\x65\x62\xa3\x71\x33\x29\x62\xdb\xc1\x33\x34\x1a\x63\x33\x16\xb6\x64\x7e\xab\x33\xf0\xe6\x26\x68\xba\x1d\x2e\x38\x8\xe6\x2\xd3\x25\x2c\x47\x23\x58\x34\xf\x9d\x63\x4f\x63\xbb\x7f\x3b\x34\x38\xa7\xb5\x8d\x65\xd9\x9f\x79\x55\x3e\x4d\xe7\x73\xd8\xf6\x98\x97\x84\x60\x9c\xc8\xa9\x3c\xf6\xdc\x12\x5c\xe1\xbb\xb\x8b\x98\x9c\x9d\x26\x7c\x4a\xe6\x46\x36\x58\x21\x4a\xee\xca\xd7\x3b\xc2\x6c\x49\x2f\xe5\xd5\x3\x59\x84\x53\xcb\xfe\x92\x71\x2e\x7c\x21\xcc\x99\x85\x7f\xb8\x74\x90\x13\x42\x3f\xe0\x6b\x1d\xf2\x4d\x54\xd4\xfc\x3a\x5\xe6\x74\xaf\xa6\xa0\x2a\x20\x23\x5d\x34\x5c\xd9\x3e\x4e\xfa\x93\xe7\xaa\xe9\x6f\x8\x43\x67\x41\xc5\xad\xfb\x31\x95\x82\x73\x32\xd8\xa6\xa3\xed\xe\x2d\xf6\x5f\xfd\x80\xa6\x7a\xe0\xdf\x78\x15\x29\x74\x33\xd0\x9e\x83\x86\x72\x22\x57\x29\xb9\x9e\x5d\xd3\x1a\xb5\x96\x72\x41\x3d\xf1\x64\x43\x67\xee\xaa\x5c\xd3\x9a\x96\x13\x11\x5d\xf3\xc\x87\x82\x1e\x41\x9e\xd0\x27\xd7\x54\x3b\x67\x73\x9\x91\xe9\xd5\x36\xa7\xb5\x55\xe4\xf3\x21\x51\x49\x22\x7\x55\x4f\x44\x4b\xd2\x15\x93\x17\x2a\xfa\x4d\x4a\x57\xdb\x4c\xa6\xeb\xec\x53\x25\x6c\x21\xed\x0\x4c\x3b\xca\x14\x57\xa9\xd6\x6a\xcd\x8d\x5e\x74\xac\x72\xc1\x97\xe5\x1b\x45\x4e\xda\xfc\xcc\x40\xe8\x48\x88\xb\xa3\xe3\x8d\x83\x42\xc3\x23\xfd\x68\xb5\x8e\xf1\x9d\x63\x77\xe9\xa3\x8e\x8c\x26\x6b\xbd\x72\x73\x35\xc\x3\xf8\x43\x78\x52\x71\x15\x1f\x71\x5d\x6e\xed\xb9\xcc\x86\x30\xdb\x2b\xd3\x82\x88\x23\x71\x90\x53\x5c\xa9\x2f\x76\x1\xb7\x9a\xfe\x43\x55\xa3\x4\x9b\xe\xe4\x59\xdf\xc9\xe9\xb1\xea\x29\x28\x3c\x5c\xae\x72\x84\xb6\xc6\xeb\xc\x27\x7\x74\x90\xd\x31\xb0\x0\x77\xe9\x40\x70\x6f\x68\xa7\xfd\x6\xec\x4b\xc0\xb7\xac\xbc\x33\xb7\x6d\xa\xbd\x12\x1b\x59\xcb\xdd\x32\xf5\x1d\x94\x57\x76\x9e\xc\x18\x98\x71\xd7\x2a\xdb\xb\x7b\xa7\x71\xb7\x67\x81\x23\x96\xae\xb9\x7e\x32\x43\x92\x8a\x19\xa0\xc4\xd4\x3b\x57\xf9\x4a\x2c\xfb\x51\x46\xbb\xcb\x5d\xb3\xef\x13\x93\x6e\x68\x42\x54\x57\xd3\x6a\x3a\x8f\x9d\x66\xbf\xbd\x36\x23\xf5\x93\x83\x7b\x9c\xc0\xdd\xc5\x49\xc0\x64\xed\x7\x12\xb3\xe6\xe4\xe5\x38\x95\x23\xb1\xa0\x3b\x1a\x61\xda\x17\xac\xc3\x58\xdd\x74\x64\x22\x11\xe8\x32\x1d\x16\x93\x85\x99\xa5\x9c\x34\x55\xb1\xe9\x20\x72\xc9\x28\x7b\x79\x0\xa1\xa6\xa3\x27\x40\x18\x8a\x54\xe0\xcc\xe8\x4e\x8e\x43\x96\xe7\x3f\xc8\xe9\xb2\xf9\xc9\xda\x4\x71\x50\x47\xe4\xaa\xce\xa2\x30\xc8\xe4\xac\xc7\xd\x6\x2e\xe6\xe8\x80\x36\x29\x9e\x1\xb8\xc3\xf0\xa0\x5d\x7a\xca\x4d\xa0\x57\xbd\x2a\x45\xa7\x7f\x9c\x93\x7\x8f\x35\x67\x92\xe3\xe9\x7f\xa8\x61\x43\x9e\x25\x4f\x33\x76\x13\x6e\x12\xb9\xdd\xa4\x7c\x8\x9f\x7c\xe7\xa\x8d\x84\x6\xa4\x33\x17\x34\x5e\x10\x7c\xc0\xa8\x3d\x1f\x42\x20\x51\x65\x5d\x9\xc3\xaa\xc0\xc8\xd\xf0\x79\xbc\x20\x1b\x95\xe7\x6\x7d\x47\x20\x3\x1a\x74\xdd\xe2\xd4\xae\x38\x71\x9b\xf5\x80\xec\x8\x4e\x56\xba\x76\x12\x1a\xdf\x48\xf3\xae\xb3\xe6\xe6\xbe\xc0\x91\x2e\x1\xb3\x1\x86\xa2\xb9\x52\xd1\x21\xae\xd4\x97\x1d\xef\x41\x12\x95\x3d\x48\x45\x1c\x56\x32\x8f\xb8\x43\xbb\x19\xf3\xca\xe9\xeb\x6d\x84\xbe\x86\x6\xe2\x36\xb2\x62\x9d\xd3\x4c\x48\x18\x54\x13\x4e\xcf\xfd\xba\x84\xb9\x30\x53\xcf\xfb\xb9\x29\x8f\xdc\x9f\xef\x60\xb\x64\xf6\x8b\xee\xa6\x91\xc2\x41\x6c\xf6\xfa\x79\x67\x4b\xc1\x3f\xaf\x9\x81\xd4\x5d\xcb\x9\xdf\x36\x31\xc0\x14\x3c\x7c\xe\x65\x95\x99\x6d\xa3\xf4\xd7\x38\xee\x1a\x2b\x37\xe2\xa4\x3b\x4b\xd0\x65\xca\xf8\xc3\xe8\x15\x20\xef\xf2\x0\xfd\x1\x9\xc5\xc8\x17\x4\x93\xd0\x93\x3\x55\xc5\xfe\x32\xa3\x3e\x28\x2d\x3b\x93\x8a\xcc\x7\x72\x80\x8b\x74\x16\x24\xbb\xda\x94\x39\x30\x8f\xb1\xcd\x4a\x90\x92\x7c\x14\x8f\x95\x4e\xac\x9b\xd8\x8f\x1a\x87\xa4\x32\x27\x8a\xba\xf7\x41\xcf\x84\x37\x19\xe6\x6\xf5\xe\xcf\x36\xf5\x9e\x6c\xde\xbc\xff\x64\x7e\x4e\x59\x57\x48\xfe\x14\xf7\x9c\x93\x5d\x15\xad\xcc\x11\xb1\x17\x18\xb2\x7e\xcc\xab\xe9\xce\x7d\x77\x5b\x51\x1b\x1e\x20\xa8\x32\x6\xe\x75\x93\xac\xdb\x35\x37\x1f\xe9\x19\x1d\xb4\x71\x97\xd6\x4e\x2c\x8\xa5\x13\xf9\xe\x7e\x78\x6e\x14\xe0\xa9\xb9\x96\x4c\x80\x82\xba\x17\xb3\x9d\x69\xb0\x84\x46\xff\xf9\x52\x79\x94\x58\x3a\x62\x90\x15\x35\x71\x10\x37\xed\xa1\x8e\x53\x6e\xf4\x26\x57\x93\x15\x93\xf6\x81\x2c\x5a\x10\xda\x92\xad\x2f\xdb\x28\x31\x2d\x55\x4\xd2\x6\x28\x8c\x1e\xdc\xea\x54\xac\xff\xb7\x6c\x30\x15\xd4\xb4\xd\x0\x93\x57\xdd\xd2\x7\x7\x6\xd9\x43\x9b\xcd\x3a\xf4\x7d\x4c\x36\x5d\x23\xa2\xcc\x57\x40\x91\xe9\x2c\x2f\x2c\xd5\x30\x9b\x17\xb0\xc9\xf7\xa7\x2f\xd1\x93\x20\x6b\xc6\xc1\xe4\x6f\xcb\xd1\xe7\x9\xf\x9e\xdc\xaa\x9f\x2f\xdf\x56\x9f\xd4\x33\x4\xaf\xd3\x6c\x58\x61\xf0\x30\xec\xf2\x7f\xf2\x9c\xdf\x39\xbb\x6f\xa2\x8c\x7e\xc4\x22\x51\x71\xc0\x4d\x14\x1a\xc4\xcd\x4\xd9\x87\x8\x50\x5\xcc\xaf\xf6\xf0\x8f\x92\x54\x58\xc2\xc7\x9\x7a\x59\x2\x5\xe8\xb0\x86\xd9\xbf\x7b\x35\x51\x4d\xaf\x8\x97\x2c\x65\xda\x2a\x71\x3a\xa8\x51\xcc\xf2\x73\x27\xc3\xfd\x62\xcf\xe3\xb2\xca\xcb\xbe\x1a\xa\xa1\x34\x7b\x77\xc4\x62\x68\x78\x5f\x94\x7\x4\x65\x16\x4b\x61\xcb\xff\x75\x26\x50\x66\x1f\x6e\x93\xf8\xc5\x51\xeb\xa4\x4a\x48\x68\x6b\xe2\x5e\x44\xb2\x50\x2c\x6c\xae\x79\x4e\x66\x35\x81\x50\xac\xbc\x3f\xb1\xc\xf3\x5\x3c\x4a\xa3\x6c\x2a\x79\xb4\xb7\xab\xca\xc7\x9b\x8e\xcd\x5f\x11\x3\xcb\x30\xa3\xab\xda\xfe\x64\xb9\xbb\xd8\x5e\x3a\x1a\x56\xe5\x5\x48\x90\x1e\x61\x69\x1b\x22\xe6\x1a\x3c\x75\xad\x1f\x37\x28\xdc\xe4\x6d\xbd\x42\xdc\xd3\xc8\xb6\x1c\x48\xfe\x94\x77\x7f\xbd\x62\xac\xa3\x47\x27\xcf\x5f\xd9\xdb\xaf\xec\xf7\x5e\xc1\xb0\x9d\x1\x26\x99\x7e\x8f\x3\x70\xb5\x42\xbe\x67\x28\x1b\x7c\xbd\x61\x21\x97\xcc\x5c\xe1\x97\x8f\x8d\xde\x2b\xaa\xa7\x71\x1d\x1e\x2\x73\x70\x58\x32\x5b\x1d\x67\x3d\xe0\x74\x4f\x3\xf2\x70\x51\x79\xf1\x61\x70\x15\x74\x9d\x23\x89\xde\xac\xfd\xde\xd0\x1f\xc3\x87\x44\x35\x4b\xe5\xb0\x60\xc5\x22\xe4\x9e\xca\xeb\xd5\x3a\x9\x45\xa4\xdb\xfa\x3f\xeb\x1b\xc7\xc8\x14\x99\x51\x92\x10\xed\xed\x28\xe0\xa1\xf8\x26\xcf\xcd\xcb\x63\xa1\x3b\xe3\xdf\x7e\xfe\xa6\xf0\x81\x9a\xbf\x55\xde\x54\xd5\x56\x60\x98\x10\x68\xf4\x38\x96\x8e\x6f\x1d\x44\x7f\xd6\x2f\xfe\x55\xfb\xc\x7e\x67\xe2\x61\x44\xed\xf2\x35\x30\x5d\xe9\xc7\xd6\x6d\xe0\xa0\xed\xf3\xfc\xd8\x3e\xa\x7b\xcd\xaf\x65\x68\x18\xc0\xec\x4\x1c\x74\x6d\xe2\x6e\x79\xd4\x11\x2b\x62\xd5\x27\xad\x4f\x1\x59\x73\xcc\x6a\x53\xfb\x2d\xd5\x4e\x99\x21\x65\x4d\xf5\x82\xf7\xd8\x42\xce\x6f\x3d\x36\x47\xf1\x5\x16\xe8\x1b\x6a\x8f\x93\xf2\x8f\x37\x40\x12\x28\xa3\xe6\xb9\x17\x4a\x1f\xb1\xd1\x66\x69\x86\xc4\xfc\x97\xae\x3f\x8f\x1e\x2b\xdf\xcd\xf9\x3c",
+	.ilen	= 1949,
+	.result	= "\x7a\x57\xf2\xc7\x6\x3f\x50\x7b\x36\x1a\x66\x5c\xb9\xe\x5e\x3b\x45\x60\xbe\x9a\x31\x9f\xff\x5d\x66\x34\xb4\xdc\xfb\x9d\x8e\xee\x6a\x33\xa4\x7\x3c\xf9\x4c\x30\xa1\x24\x52\xf9\x50\x46\x88\x20\x2\x32\x3a\xe\x99\x63\xaf\x1f\x15\x28\x2a\x5\xff\x57\x59\x5e\x18\xa1\x1f\xd0\x92\x5c\x88\x66\x1b\x0\x64\xa5\x93\x8d\x6\x46\xb0\x64\x8b\x8b\xef\x99\x5\x35\x85\xb3\xf3\x33\xbb\xec\x66\xb6\x3d\x57\x42\xe3\xb4\xc6\xaa\xb0\x41\x2a\xb9\x59\xa9\xf6\x3e\x15\x26\x12\x3\x21\x4c\x74\x43\x13\x2a\x3\x27\x9\xb4\xfb\xe7\xb7\x40\xff\x5e\xce\x48\x9a\x60\xe3\x8b\x80\x8c\x38\x2d\xcb\x93\x37\x74\x5\x52\x6f\x73\x3e\xc3\xbc\xca\x72\xa\xeb\xf1\x3b\xa0\x95\xdc\x8a\xc4\xa9\xdc\xca\x44\xd8\x8\x63\x6a\x36\xd3\x3c\xb8\xac\x46\x7d\xfd\xaa\xeb\x3e\xf\x45\x8f\x49\xda\x2b\xf2\x12\xbd\xaf\x67\x8a\x63\x48\x4b\x55\x5f\x6d\x8c\xb9\x76\x34\x84\xae\xc2\xfc\x52\x64\x82\xf7\xb0\x6\xf0\x45\x73\x12\x50\x30\x72\xea\x78\x9a\xa8\xaf\xb5\xe3\xbb\x77\x52\xec\x59\x84\xbf\x6b\x8f\xce\x86\x5e\x1f\x23\xe9\xfb\x8\x86\xf7\x10\xb9\xf2\x44\x96\x44\x63\xa9\xa8\x78\x0\x23\xd6\xc7\xe7\x6e\x66\x4f\xcc\xee\x15\xb3\xbd\x1d\xa0\xe5\x9c\x1b\x24\x2c\x4d\x3c\x62\x35\x9c\x88\x59\x9\xdd\x82\x1b\xcf\xa\x83\x6b\x3f\xae\x3\xc4\xb4\xdd\x7e\x5b\x28\x76\x25\x96\xd9\xc9\x9d\x5f\x86\xfa\xf6\xd7\xd2\xe6\x76\x1d\xf\xa1\xdc\x74\x5\x1b\x1d\xe0\xcd\x16\xb0\xa8\x8a\x34\x7b\x15\x11\x77\xe5\x7b\x7e\x20\xf7\xda\x38\xda\xce\x70\xe9\xf5\x6c\xd9\xbe\xc\x4c\x95\x4c\xc2\x9b\x34\x55\x55\xe1\xf3\x46\x8e\x48\x74\x14\x4f\x9d\xc9\xf5\xe8\x1a\xf0\x11\x4a\xc1\x8d\xe0\x93\xa0\xbe\x9\x1c\x2b\x4e\xf\xb2\x87\x8b\x84\xfe\x92\x32\x14\xd7\x93\xdf\xe7\x44\xbc\xc5\xae\x53\x69\xd8\xb3\x79\x37\x80\xe3\x17\x5c\xec\x53\x0\x9a\xe3\x8e\xdc\x38\xb8\x66\xf0\xd3\xad\x1d\x2\x96\x86\x3e\x9d\x3b\x5d\xa5\x7f\x21\x10\xf1\x1f\x13\x20\xf9\x57\x87\x20\xf5\x5f\xf1\x17\x48\xa\x51\x5a\xcd\x19\x3\xa6\x5a\xd1\x12\x97\xe9\x48\xe2\x1d\x83\x75\x50\xd9\x75\x7d\x6a\x82\xa1\xf9\x4e\x54\x87\x89\xc9\xc\xb7\x5b\x6a\x91\xc1\x9c\xb2\xa9\xdc\x9a\xa4\x49\xa\x6d\xd\xbb\xde\x86\x44\xdd\x5d\x89\x2b\x96\xf\x23\x95\xad\xcc\xa2\xb3\xb9\x7e\x74\x38\xba\x9f\x73\xae\x5f\xf8\x68\xa2\xe0\xa9\xce\xbd\x40\xd4\x4c\x6b\xd2\x56\x62\xb0\xcc\x63\x7e\x5b\xd3\xae\xd1\x75\xce\xbb\xb4\x5b\xa8\xf8\xb4\xac\x71\x75\xaa\xc9\x9f\xbb\x6c\xad\xf\x55\x5d\xe8\x85\x7d\xf9\x21\x35\xea\x92\x85\x2b\x0\xec\x84\x90\xa\x63\x96\xe4\x6b\xa9\x77\xb8\x91\xf8\x46\x15\x72\x63\x70\x1\x40\xa3\xa5\x76\x62\x2b\xbf\xf1\xe5\x8d\x9f\xa3\xfa\x9b\x3\xbe\xfe\x65\x6f\xa2\x29\xd\x54\xb4\x71\xce\xa9\xd6\x3d\x88\xf9\xaf\x6b\xa8\x9e\xf4\x16\x96\x36\xb9\x0\xdc\x10\xab\xb5\x8\x31\x1f\x0\xb1\x3c\xd9\x38\x3e\xc6\x4\xa7\x4e\xe8\xae\xed\x98\xc2\xf7\xb9\x0\x5f\x8c\x60\xd1\xe5\x15\xf7\xae\x1e\x84\x88\xd1\xf6\xbc\x3a\x89\x35\x22\x83\x7c\xca\xf0\x33\x82\x4c\x79\x3c\xfd\xb1\xae\x52\x62\x55\xd2\x41\x60\xc6\xbb\xfa\xe\x59\xd6\xa8\xfe\x5d\xed\x47\x3d\xe0\xea\x1f\x6e\x43\x51\xec\x10\x52\x56\x77\x42\x6b\x52\x87\xd8\xec\xe0\xaa\x76\xa5\x84\x2a\x22\x24\xfd\x92\x40\x88\xd5\x85\x1c\x1f\x6b\x47\xa0\xc4\xe4\xef\xf4\xea\xd7\x59\xac\x2a\x9e\x8c\xfa\x1f\x42\x8\xfe\x4f\x74\xa0\x26\xf5\xb3\x84\xf6\x58\x5f\x26\x66\x3e\xd7\xe4\x22\x91\x13\xc8\xac\x25\x96\x23\xd8\x9\xea\x45\x75\x23\xb8\x5f\xc2\x90\x8b\x9\xc4\xfc\x47\x6c\x6d\xa\xef\x69\xa4\x38\x19\xcf\x7d\xf9\x9\x73\x9b\x60\x5a\xf7\x37\xb5\xfe\x9f\xe3\x2b\x4c\xd\x6e\x19\xf1\xd6\xc0\x70\xf3\x9d\x22\x3c\xf9\x49\xce\x30\x8e\x44\xb5\x76\x15\x8f\x52\xfd\xa5\x4\xb8\x55\x6a\x36\x59\x7c\xc4\x48\xb8\xd7\xab\x5\x66\xe9\x5e\x21\x6f\x6b\x36\x29\xbb\xe9\xe3\xa2\x9a\xa8\xcd\x55\x25\x11\xba\x5a\x58\xa0\xde\xae\x19\x2a\x48\x5a\xff\x36\xcd\x6d\x16\x7a\x73\x38\x46\xe5\x47\x59\xc8\xa2\xf6\xe2\x6c\x83\xc5\x36\x2c\x83\x7d\xb4\x1\x5\x69\xe7\xaf\x5c\xc4\x64\x82\x12\x21\xef\xf7\xd1\x7d\xb8\x8d\x8c\x98\x7c\x5f\x7d\x92\x88\xb9\x94\x7\x9c\xd8\xe9\x9c\x17\x38\xe3\x57\x6c\xe0\xdc\xa5\x92\x42\xb3\xbd\x50\xa2\x7e\xb5\xb1\x52\x72\x3\x97\xd8\xaa\x9a\x1e\x75\x41\x11\xa3\x4f\xcc\xd4\xe3\x73\xad\x96\xdc\x47\x41\x9f\xb0\xbe\x79\x91\xf5\xb6\x18\xfe\xc2\x83\x18\x7d\x73\xd9\x4f\x83\x84\x3\xb3\xf0\x77\x66\x3d\x83\x63\x2e\x2c\xf9\xdd\xa6\x1f\x89\x82\xb8\x23\x42\xeb\xe2\xca\x70\x82\x61\x41\xa\x6d\x5f\x75\xc5\xe2\xc4\x91\x18\x44\x22\xfa\x34\x10\xf5\x20\xdc\xb7\xdd\x2a\x20\x77\xf5\xf9\xce\xdb\xa0\xa\x52\x2a\x4e\xdd\xcc\x97\xdf\x5\xe4\x5e\xb7\xaa\xf0\xe2\x80\xff\xba\x1a\xf\xac\xdf\x2\x32\xe6\xf7\xc7\x17\x13\xb7\xfc\x98\x48\x8c\xd\x82\xc9\x80\x7a\xe2\xa\xc5\xb4\xde\x7c\x3c\x79\x81\xe\x28\x65\x79\x67\x82\x69\x44\x66\x9\xf7\x16\x1a\xf9\x7d\x80\xa1\x79\x14\xa9\xc8\x20\xfb\xa2\x46\xbe\x8\x35\x17\x58\xc1\x1a\xda\x2a\x6b\x2e\x1e\xe6\x27\x55\x7b\x19\xe2\xfb\x64\xfc\x5e\x15\x54\x3c\xe7\xc2\x11\x50\x30\xb8\x72\x3\xb\x1a\x9f\x86\x27\x11\x5c\x6\x2b\xbd\x75\x1a\xa\xda\x1\xfa\x5c\x4a\xc1\x80\x3a\x6e\x30\xc8\x2c\xeb\x56\xec\x89\xfa\x35\x7b\xb2\xf0\x97\x8\x86\x53\xbe\xbd\x40\x41\x38\x1c\xb4\x8b\x79\x2e\x18\x96\x94\xde\xe8\xca\xe5\x9f\x92\x9f\x15\x5d\x56\x60\x5c\x9\xf9\x16\xf4\x17\xf\xf6\x4c\xda\xe6\x67\x89\x9f\xca\x6c\xe7\x9b\x4\x62\xe\x26\xa6\x52\xbd\x29\xff\xc7\xa4\x96\xe6\x6a\x2\xa5\x2e\x7b\xfe\x97\x68\x3e\x2e\x5f\x3b\xf\x36\xd6\x98\x19\x59\x48\xd2\xc6\xe1\x55\x1a\x6e\xd6\xed\x2c\xba\xc3\x9e\x64\xc9\x95\x86\x35\x5e\x3e\x88\x69\x99\x4b\xee\xbe\x9a\x99\xb5\x6e\x58\xae\xdd\x22\xdb\xdd\x6b\xfc\xaf\x90\xa3\x3d\xa4\xc1\x15\x92\x18\x8d\xd2\x4b\x7b\x6\xd1\x37\xb5\xe2\x7c\x2c\xf0\x25\xe4\x94\x2a\xbd\xe3\x82\x70\x78\xa3\x82\x10\x5a\x90\xd7\xa4\xfa\xaf\x1a\x88\x59\xdc\x74\x12\xb4\x8e\xd7\x19\x46\xf4\x84\x69\x9f\xbb\x70\xa8\x4c\x52\x81\xa9\xff\x76\x1c\xae\xd8\x11\x3d\x7f\x7d\xc5\x12\x59\x28\x18\xc2\xa2\xb7\x1c\x88\xf8\xd6\x1b\xa6\x7d\x9e\xde\x29\xf8\xed\xff\xeb\x92\x24\x4f\x5\xaa\xd9\x49\xba\x87\x59\x51\xc9\x20\x5c\x9b\x74\xcf\x3\xd9\x2d\x34\xc7\x5b\xa5\x40\xb2\x99\xf5\xcb\xb4\xf6\xb7\x72\x4a\xd6\xbd\xb0\xf3\x93\xe0\x1b\xa8\x4\x1e\x35\xd4\x80\x20\xf4\x9c\x31\x6b\x45\xb9\x15\xb0\x5e\xdd\xa\x33\x9c\x83\xcd\x58\x89\x50\x56\xbb\x81\x0\x91\x32\xf3\x1b\x3e\xcf\x45\xe1\xf9\xe1\x2c\x26\x78\x93\x9a\x60\x46\xc9\xb5\x5e\x6a\x28\x92\x87\x3f\x63\x7b\xdb\xf7\xd0\x13\x9d\x32\x40\x5e\xcf\xfb\x79\x68\x47\x4c\xfd\x1\x17\xe6\x97\x93\x78\xbb\xa6\x27\xa3\xe8\x1a\xe8\x94\x55\x7d\x8\xe5\xdc\x66\xa3\x69\xc8\xca\xc5\xa1\x84\x55\xde\x8\x91\x16\x3a\xc\x86\xab\x27\x2b\x64\x34\x2\x6c\x76\x8b\xc6\xaf\xcc\xe1\xd6\x8c\x2a\x18\x3d\xa6\x1b\x37\x75\x45\x73\xc2\x75\xd7\x53\x78\x3a\xd6\xe8\x29\xd2\x4a\xa8\x1e\x82\xf6\xb6\x81\xde\x21\xed\x2b\x56\xbb\xf2\xd0\x57\xc1\x7c\xd2\x6a\xd2\x56\xf5\x13\x5f\x1c\x6a\xb\x74\xfb\xe9\xfe\x9e\xea\x95\xb2\x46\xab\xa\xfc\xfd\xf3\xbb\x4\x2b\x76\x1b\xa4\x74\xb0\xc1\x78\xc3\x69\xe2\xb0\x1\xe1\xde\x32\x4c\x8d\x1a\xb3\x38\x8\xd5\xfc\x1f\xdc\xe\x2c\x9c\xb1\xa1\x63\x17\x22\xf5\x6c\x93\x70\x74\x0\xf8\x39\x1\x94\xd1\x32\x23\x56\x5d\xa6\x2\x76\x76\x93\xce\x2f\x19\xe9\x17\x52\xae\x6e\x2c\x6d\x61\x7f\x3b\xaa\xe0\x52\x85\xc5\x65\xc1\xbb\x8e\x5b\x21\xd5\xc9\x78\x83\x7\x97\x4c\x62\x61\x41\xd4\xfc\xc9\x39\xe3\x9b\xd0\xcc\x75\xc4\x97\xe6\xdd\x2a\x5f\xa6\xe8\x59\x6c\x98\xb9\x2\xe2\xa2\xd6\x68\xee\x3b\x1d\xe3\x4d\x5b\x30\xef\x3\xf2\xeb\x18\x57\x36\xe8\xa1\xf4\x47\xfb\xcb\x8f\xcb\xc8\xf3\x4f\x74\x9d\x9d\xb1\x8d\x14\x44\xd9\x19\xb4\x54\x4f\x75\x19\x9\xa0\x75\xbc\x3b\x82\xc6\x3f\xb8\x83\x19\x6e\xd6\x37\xfe\x6e\x8a\x4e\xe0\x4a\xab\x7b\xc8\xb4\x1d\xf4\xed\x27\x3\x65\xa2\xa1\xae\x11\xe7\x98\x78\x48\x91\xd2\xd2\xd4\x23\x78\x50\xb1\x5b\x85\x10\x8d\xca\x5f\xf\x71\xae\x72\x9a\xf6\x25\x19\x60\x6\xf7\x10\x34\x18\xd\xc9\x9f\x7b\xc\x9b\x8f\x91\x1b\x9f\xcd\x10\xee\x75\xf9\x97\x66\xfc\x4d\x33\x6e\x28\x2b\x92\x85\x4f\xab\x43\x8d\x8f\x7d\x86\xa7\xc7\xd8\xd3\xb\x8b\x57\xb6\x1d\x95\xd\xe9\xbc\xd9\x3\xd9\x10\x19\xc3\x46\x63\x55\x87\x61\x79\x6c\x95\xe\x9c\xdd\xca\xc3\xf3\x64\xf0\x7d\x76\xb7\x53\x67\x2b\x1e\x44\x56\x81\xea\x8f\x5c\x42\x16\xb8\x28\xeb\x1b\x61\x10\x1e\xbf\xec\xa8"
+}, {
+	.key	= "\xb3\x35\x50\x3\x54\x2e\x40\x5e\x8f\x59\x8e\xc5\x90\xd5\x27\x2d\xba\x29\x2e\xcb\x1b\x70\x44\x1e\x65\x91\x6e\x2a\x79\x22\xda\x64",
+	.nonce	= "\x5\xa3\x93\xed\x30\xc5\xa2\x6",
+	.assoc	= "\xb1\x69\x83\x87\x30\xaa\x5d\xb8\x77\xe8\x21\xff\x6\x59\x35\xce\x75\xfe\x38\xef\xb8\x91\x43\x8c\xcf\x70\xdd\xa\x68\xbf\xd4\xbc\x16\x76\x99\x36\x1e\x58\x79\x5e\xd4\x29\xf7\x33\x93\x48\xdb\x5f\x1\xae\x9c\xb6\xe4\x88\x6d\x2b\x76\x75\xe0\xf3\x74\xe2\xc9",
+	.alen	= 63,
+	.input	= "\x52\x34\xb3\x65\x3b\xb7\xe5\xd3\xab\x49\x17\x60\xd2\x52\x56\xdf\xdf\x34\x56\x82\xe2\xbe\xe5\xe1\x28\xd1\x4e\x5f\x4f\x1\x7d\x3f\x99\x6b\x30\x6e\x1a\x7c\x4c\x8e\x62\x81\xae\x86\x3f\x6b\xd0\xb5\xa9\xcf\x50\xf1\x2\x12\xa0\xb\x24\xe9\xe6\x72\x89\x2c\x52\x1b\x34\x38\xf8\x75\x5f\xa0\x74\xe2\x99\xdd\xa6\x4b\x14\x50\x4e\xf1\xbe\xd6\x9e\xdb\xb2\x24\x27\x74\x12\x4a\x78\x78\x17\xa5\x58\x8e\x2f\xf9\xf4\x8d\xee\x3\x88\xae\xb8\x29\xa1\x2f\x4b\xee\x92\xbd\x87\xb3\xce\x34\x21\x57\x46\x4\x49\xc\x80\xf2\x1\x13\xa1\x55\xb3\xff\x44\x30\x3c\x1c\xd0\xef\xbc\x18\x74\x26\xad\x41\x5b\x5b\x3e\x9a\x7a\x46\x4f\x16\xd6\x74\x5a\xb7\x3a\x28\x31\xd8\xae\x26\xac\x50\x53\x86\xf2\x56\xd7\x3f\x29\xbc\x45\x68\x8e\xcb\x98\x64\xdd\xc9\xba\xb8\x4b\x7b\x82\xdd\x14\xa7\xcb\x71\x72\x0\x5c\xad\x7b\x6a\x89\xa4\x3d\xbf\xb5\x4b\x3e\x7c\x5a\xcf\xb8\xa1\xc5\x6e\xc8\xb6\x31\x57\x7b\xdf\xa5\x7e\xb1\xd6\x42\x2a\x31\x36\xd1\xd0\x3f\x7a\xe5\x94\xd6\x36\xa0\x6f\xb7\x40\x7d\x37\xc6\x55\x7c\x50\x40\x6d\x29\x89\xe3\x5a\xae\x97\xe7\x44\x49\x6e\xbd\x81\x3d\x3\x93\x6\x12\x6\xe2\x41\x12\x4a\xf1\x6a\xa4\x58\xa2\xfb\xd2\x15\xba\xc9\x79\xc9\xce\x5e\x13\xbb\xf1\x9\x4\xcc\xfd\xe8\x51\x34\x6a\xe8\x61\x88\xda\xed\x1\x47\x84\xf5\x73\x25\xf9\x1c\x42\x86\x7\xf3\x5b\x1a\x1\xb3\xeb\x24\x32\x8d\xf6\xed\x7c\x4b\xeb\x3c\x36\x42\x28\xdf\xdf\xb6\xbe\xd9\x8c\x52\xd3\x2b\x8\x90\x8c\xe7\x98\x31\xe2\x32\x8e\xfc\x11\x48\x0\xa8\x6a\x42\x4a\x2\xc6\x4b\x9\xf1\xe3\x49\xf3\x45\x1f\xe\xbc\x56\xe2\xe4\xdf\xfb\xeb\x61\xfa\x24\xc1\x63\x75\xbb\x47\x75\xaf\xe1\x53\x16\x96\x21\x85\x26\x11\xb3\x76\xe3\x23\xa1\x6b\x74\x37\xd0\xde\x6\x90\x71\x5d\x43\x88\x9b\x0\x54\xa6\x75\x2f\xa1\xc2\xb\x73\x20\x1d\xb6\x21\x79\x57\x3f\xfa\x9\xbe\x8a\x33\xc3\x52\xf0\x1d\x82\x31\xd1\x55\xb5\x6c\x99\x25\xcf\x5c\x32\xce\xe9\xd\xfa\x69\x2c\xd5\xd\xc5\x6d\x86\xd0\xc\x3b\x6\x50\x79\xe8\xc3\xae\x4\xe6\xcd\x51\xe4\x26\x9b\x4f\x7e\xa6\xf\xab\xd8\xe5\xde\xa9\x0\x95\xbe\xa3\x9d\x5d\xb2\x9\x70\x18\x1c\xf0\xac\x29\x23\x2\x29\x28\xd2\x74\x35\x57\x62\xf\x24\xea\x5e\x33\xc2\x92\xf3\x78\x4d\x30\x1e\xa1\x99\xa9\x82\xb0\x42\x31\x8d\xad\x8a\xbc\xfc\xd4\x57\x47\x3e\xb4\x50\xdd\x6e\x2c\x80\x4d\x22\xf1\xfb\x57\xc4\xdd\x17\xe1\x8a\x36\x4a\xb3\x37\xca\xc9\x4e\xab\xd5\x69\xc4\xf4\xbc\xb\x3b\x44\x4b\x29\x9c\xee\xd4\x35\x22\x21\xb0\x1f\x27\x64\xa8\x51\x1b\xf0\x9f\x19\x5c\xfb\x5a\x64\x74\x70\x45\x9\xf5\x64\xfe\x1a\x2d\xc9\x14\x4\x14\xcf\xd5\x7d\x60\xaf\x94\x39\x94\xe2\x7d\x79\x82\xd0\x65\x3b\x6b\x9c\x19\x84\xb4\x6d\xb3\xc\x99\xc0\x56\xa8\xbd\x73\xce\x5\x84\x3e\x30\xaa\xc4\x9b\x1b\x4\x2a\x9f\xd7\x43\x2b\x23\xdf\xbf\xaa\xd5\xc2\x43\x2d\x70\xab\xdc\x75\xad\xac\xf7\xc0\xbe\x67\xb2\x74\xed\x67\x10\x4a\x92\x60\xc1\x40\x50\x19\x8a\x8a\x8c\x9\xe\x72\xe1\x73\x5e\xe8\x41\x85\x63\x9f\x3f\xd7\x7d\xc4\xfb\x22\x5d\x92\x6c\xb3\x1e\xe2\x50\x2f\x82\xa8\x28\xc0\xb5\xd7\x5f\x68\xd\x2c\x2d\xaf\x7e\xfa\x2e\x8\xf\x1f\x70\x9f\xe9\x19\x72\x55\xf8\xfb\x51\xd2\x33\x5d\xa0\xd3\x2b\xa\x6c\xbc\x4e\xcf\x36\x4d\xdc\x3b\xe9\x3e\x81\x7c\x61\xdb\x20\x2d\x3a\xc3\xb3\xc\x1e\x0\xb9\x7c\xf5\xca\x10\x5f\x3a\x71\xb3\xe4\x20\xdb\xc\x2a\x98\x63\x45\x0\x58\xf6\x68\xe4\xb\xda\x13\x3b\x60\x5c\x76\xdb\xb9\x97\x71\xe4\xd9\xb7\xdb\xbd\x68\xc7\x84\x84\xaa\x7c\x68\x62\x5e\x16\xfc\xba\x72\xaa\x9a\xa9\xeb\x7c\x75\x47\x97\x7e\xad\xe2\xd9\x91\xe8\xe4\xa5\x31\xd7\x1\x8e\xa2\x11\x88\x95\xb9\xf2\x9b\xd3\x7f\x1b\x81\x22\xf7\x98\x60\xa\x64\xa6\xc1\xf6\x49\xc7\xe3\x7\x4d\x94\x7a\xcf\x6e\x68\xc\x1b\x3f\x6e\x2e\xee\x92\xfa\x52\xb3\x59\xf8\xf1\x8f\x6a\x66\xa3\x82\x76\x4a\x7\x1a\xc7\xdd\xf5\xda\x9c\x3c\x24\xbf\xfd\x42\xa1\x10\x64\x6a\xf\x89\xee\x36\xa5\xce\x99\x48\x6a\xf0\x9f\x9e\x69\xa4\x40\x20\xe9\x16\x15\xf7\xdb\x75\x2\xcb\xe9\x73\x8b\x3b\x49\x2f\xf0\xaf\x51\x6\x5c\xdf\x27\x27\x49\x6a\xd1\xcc\xc7\xb5\x63\xb5\xfc\xb8\x5c\x87\x7f\x84\xb4\xcc\x14\xa9\x53\xda\xa4\x56\xf8\xb6\x1b\xcc\x40\x27\x52\x6\x5a\x13\x81\xd7\x3a\xd4\x3b\xfb\x49\x65\x31\x33\xb2\xfa\xcd\xad\x58\x4e\x2b\xae\xd2\x20\xfb\x1a\x48\xb4\x3f\x9a\xd8\x7a\x35\x4a\xc8\xee\x88\x5e\x7\x66\x54\xb9\xec\x9f\xa3\xe3\xb9\x37\xaa\x49\x76\x31\xda\x74\x2d\x3c\xa4\x65\x10\x32\x38\xf0\xde\xd3\x99\x17\xaa\x71\xaa\x8f\xf\x8c\xaf\xa2\xf8\x5d\x64\xba\x1d\xa3\xef\x96\x73\xe8\xa1\x2\x8d\xc\x6d\xb8\x6\x90\xb8\x8\x56\x2c\xa7\x6\xc9\xc2\x38\xdb\x7c\x63\xb1\x57\x8e\xea\x7c\x79\xf3\x49\x1d\xfe\x9f\xf3\x6e\xb1\x1d\xba\x19\x80\x1a\xa\xd3\xb0\x26\x21\x40\xb1\x7c\xf9\x4d\x8d\x10\xc1\x7e\xf4\xf6\x3c\xa8\xfd\x7c\xa3\x92\xb2\xf\xaa\xcc\xa6\x11\xfe\x4\xe3\xd1\x7a\x32\x89\xdf\xd\xc4\x8f\x79\x6b\xca\x16\x7c\x6e\xf9\xad\xf\xf6\xfe\x27\xdb\xc4\x13\x70\xf1\x62\x1a\x4f\x79\x40\xc9\x9b\x8b\x21\xea\x84\xfa\xf5\xf1\x89\xce\xb7\x55\xa\x80\x39\x2f\x55\x36\x16\x9c\x7b\x8\xbd\x87\xd\xa5\x32\xf1\x52\x7c\xe8\x55\x60\x5b\xd7\x69\xe4\xfc\xfa\x12\x85\x96\xea\x50\x28\xab\x8a\xf7\xbb\xe\x53\x74\xca\xa6\x27\x9\xc2\xb5\xde\x18\x14\xd9\xea\xe5\x29\x1c\x40\x56\xcf\xd7\xae\x5\x3f\x65\xaf\x5\x73\xe2\x35\x96\x27\x7\x14\xc0\xad\x33\xf1\xdc\x44\x7a\x89\x17\x77\xd2\x9c\x58\x60\xf0\x3f\x7b\x2d\x2e\x57\x95\x54\x87\xed\xf2\xc7\x4c\xf0\xae\x56\x29\x19\x7d\x66\x4b\x9b\x83\x84\x42\x3b\x1\x25\x66\x8e\x2\xde\xb9\x83\x54\x19\xf6\x9f\x79\xd\x67\xc5\x1d\x7a\x44\x2\x98\xa7\x16\x1c\x29\xd\x74\xff\x85\x40\x6\xef\x2c\xa9\xc6\xf5\x53\x7\x6\xae\xe4\xfa\x5f\xd8\x39\x4d\xf1\x9b\x6b\xd9\x24\x84\xfe\x3\x4c\xb2\x3f\xdf\xa1\x5\x9e\x50\x14\x5a\xd9\x1a\xa2\xa7\xfa\xfa\x17\xf7\x78\xd6\xb5\x92\x61\x91\xac\x36\xfa\x56\xd\x38\x32\x18\x85\x8\x58\x37\xf0\x4b\xdb\x59\xe7\xa4\x34\xc0\x1b\x1\xaf\x2d\xde\xa1\xaa\x5d\xd3\xec\xe1\xd4\xf7\xe6\x54\x68\xf0\x51\x97\xa7\x89\xea\x24\xad\xd3\x6e\x47\x93\x8b\x4b\xb4\xf7\x1c\x42\x6\x67\xe8\x99\xf6\xf5\x7b\x85\xb5\x65\xb5\xb5\xd2\x37\xf5\xf3\x2\xa6\x4d\x11\xa7\xdc\x51\x9\x7f\xa0\xd8\x88\x1c\x13\x71\xae\x9c\xb7\x7b\x34\xd6\x4e\x68\x26\x83\x51\xaf\x1d\xee\x8b\xbb\x69\x43\x2b\x9e\x8a\xbc\x2\xe\xa0\x1b\xe0\xa8\x5f\x6f\xaf\x1b\x8f\xe7\x64\x71\x74\x11\x7e\xa8\xd8\xf9\x97\x6\xc3\xb6\xfb\xfb\xb7\x3d\x35\x9d\x3b\x52\xed\x54\xca\xf4\x81\x1\x2d\x1b\xc3\xa7\x0\x3d\x1a\x39\x54\xe1\xf6\xff\xed\x6f\xb\x5a\x68\xda\x58\xdd\xa9\xcf\x5c\x4a\xe5\x9\x4e\xde\x9d\xbc\x3e\xee\x5a\x0\x3b\x2c\x87\x10\x65\x60\xdd\xd7\x56\xd1\x4c\x64\x45\xe4\x21\xec\x78\xf8\x25\x7a\x3e\x16\x5d\x9\x53\x14\xbe\x4f\xae\x87\xd8\xd1\xaa\x3c\xf6\x3e\xa4\x70\x8c\x5e\x70\xa4\xb3\x6b\x66\x73\xd3\xbf\x31\x6\x19\x62\x93\x15\xf2\x86\xe4\x52\x7e\x53\x4c\x12\x38\xcc\x34\x7d\x57\xf6\x42\x93\x8a\xc4\xee\x5c\x8a\xe1\x52\x8f\x56\x64\xf6\xa6\xd1\x91\x57\x70\xcd\x11\x76\xf5\x59\x60\x60\x3c\xc1\xc3\xb\x7f\x58\x1a\x50\x91\xf1\x68\x8f\x6e\x74\x74\xa8\x51\xb\xf7\x7a\x98\x37\xf2\xa\xe\xa4\x97\x4\xb8\x9b\xfd\xa0\xea\xf7\xd\xe1\xdb\x3\xf0\x31\x29\xf8\xdd\x6b\x8b\x5d\xd8\x59\xa9\x29\xcf\x9a\x79\x89\x19\x63\x46\x9\x79\x6a\x11\xda\x63\x68\x48\x77\x23\xfb\x7d\x3a\x43\xcb\x2\x3b\x7a\x6d\x10\x2a\x9e\xac\xf1\xd4\x19\xf8\x23\x64\x1d\x2c\x5f\xf2\xb0\x5c\x23\x27\xf7\x27\x30\x16\x37\xb1\x90\xab\x38\xfb\x55\xcd\x78\x58\xd4\x7d\x43\xf6\x45\x5e\x55\x8d\xb1\x2\x65\x58\xb4\x13\x4b\x36\xf7\xcc\xfe\x3d\xb\x82\xe2\x12\x11\xbb\xe6\xb8\x3a\x48\x71\xc7\x50\x6\x16\x3a\xe6\x7c\x5\xc7\xc8\x4d\x2f\x8\x6a\x17\x9a\x95\x97\x50\x68\xdc\x28\x18\xc4\x61\x38\xb9\xe0\x3e\x78\xdb\x29\xe0\x9f\x52\xdd\xf8\x4f\x91\xc1\xd0\x33\xa1\x7a\x8e\x30\x13\x82\x7\x9f\xd3\x31\xf\x23\xbe\x32\x5a\x75\xcf\x96\xb2\xec\xb5\x32\xac\x21\xd1\x82\x33\xd3\x15\x74\xbd\x90\xf1\x2c\xe6\x5f\x8d\xe3\x2\xe8\xe9\xc4\xca\x96\xeb\xe\xbc\x91\xf4\xb9\xea\xd9\x1b\x75\xbd\xe1\xac\x2a\x5\x37\x52\x9b\x1b\x3f\x5a\xdc\x21\xc3\x98\xbb\xaf\xa3\xf2\x0\xbf\xd\x30\x89\x5\xcc\xa5\x76\xf5\x6\xf0\xc6\x54\x8a\x5d\xd4\x1e\xc1\xf2\xce\xb0\x62\xc8\xfc\x59\x42\x9a\x90\x60\x55\xfe\x88\xa5\x8b\xb8\x33\xc\x23\x24\xd\x15\x70\x37\x1e\x3d\xf6\xd2\xea\x92\x10\xb2\xc4\x51\xac\xf2\xac\xf3\x6b\x6c\xaa\xcf\x12\xc5\x6c\x90\x50\xb5\xc\xfc\x1a\x15\x52\xe9\x26\xc6\x52\xa4\xe7\x81\x69\xe1\xe7\x9e\x30\x1\xec\x84\x89\xb2\xd\x66\xdd\xce\x28\x5c\xec\x98\x46\x68\x21\x9f\x88\x3f\x1f\x42\x77\xce\xd0\x61\xd4\x20\xa7\xff\x53\xad\x37\xd0\x17\x35\xc9\xfc\xba\xa\x78\x3f\xf2\xcc\x86\x89\xe8\x4b\x3c\x48\x33\x9\x7f\xc6\xc0\xdd\xb8\xfd\x7a\x66\x66\x65\xeb\x47\xa7\x4\x28\xa3\x19\x8e\xa9\xb1\x13\x67\x62\x70\xcf\xd6",
+	.ilen	= 2027,
+	.result	= "\x74\xa6\x3e\xe4\xb1\xcb\xaf\xb0\x40\xe5\xf\x9e\xf1\xf2\x89\xb5\x42\x34\x8a\xa1\x3\xb7\xe9\x57\x46\xbe\x20\xe4\x6e\xb0\xeb\xff\xea\x7\x7e\xef\xe2\x55\x9f\xe5\x78\x3a\xb7\x83\xc2\x18\x40\x7b\xeb\xcd\x81\xfb\x90\x12\x9e\x46\xa9\xd6\x4a\xba\xb0\x62\xdb\x6b\x99\xc4\xdb\x54\x4b\xb8\xa5\x71\xcb\xcd\x63\x32\x55\xfb\x31\xf0\x38\xf5\xbe\x78\xe4\x45\xce\x1b\x6a\x5b\xe\xf4\x16\xe4\xb1\x3d\xf6\x63\x7b\xa7\xc\xde\x6f\x8f\x74\xdf\xe0\x1e\x9d\xce\x8f\x24\xef\x23\x35\x33\x7b\x83\x34\x23\x58\x74\x14\x77\x1f\xc2\x4f\x4e\xc6\x89\xf9\x52\x9\x37\x64\x14\xc4\x1\x6b\x9d\x77\xe8\x90\x5d\xa8\x4a\x2a\xef\x5c\x7f\xeb\xbb\xb2\xc6\x93\x99\x66\xdc\x7f\xd4\x9e\x2a\xca\x8d\xdb\xe7\x20\xcf\xe4\x73\xae\x49\x7d\x64\xf\xe\x28\x46\xa9\xa8\x32\xe4\xe\xf6\x51\x53\xb8\x3c\xb1\xff\xa3\x33\x41\x75\xff\xf1\x6f\xf1\xfb\xbb\x83\x7f\x6\x9b\xe7\x1b\xa\xe0\x5c\x33\x60\x5b\xdb\x5b\xed\xfe\xa5\x16\x19\x72\xa3\x64\x23\x0\x2\xc7\xf3\x6a\x81\x3e\x44\x1d\x79\x15\x5f\x9a\xde\xe2\xfd\x1b\x73\xc1\xbc\x23\xba\x31\xd2\x50\xd5\xad\x7f\x74\xa7\xc9\xf8\x3e\x2b\x26\x10\xf6\x3\x36\x74\xe4\xe\x6a\x72\xb7\x73\xa\x42\x28\xc2\xad\x5e\x3\xbe\xb8\xb\xa8\x5b\xd4\xb8\xba\x52\x89\xb1\x9b\xc1\xc3\x65\x87\xed\xa5\xf4\x86\xfd\x41\x80\x91\x27\x59\x53\x67\x15\x78\x54\x8b\x2d\x3d\xc7\xff\x2\x92\x7\x5f\x7a\x4b\x60\x59\x3c\x6f\x5c\xd8\xec\x95\xd2\xfe\xa0\x3b\xd8\x3f\xd1\x69\xa6\xd6\x41\xb2\xf4\x4d\x12\xf4\x58\x3e\x66\x64\x80\x31\x9b\xa8\x4c\x8b\x7\xb2\xec\x66\x94\x66\x47\x50\x50\x5f\x18\xb\xe\xd6\xc0\x39\x21\x13\x9e\x33\xbc\x79\x36\x2\x96\x70\xf0\x48\x67\x2f\x26\xe9\x6d\x10\xbb\xd6\x3f\xd1\x64\x7a\x2e\xbe\xc\x61\xf0\x75\x42\x38\x23\xb1\x9e\x9f\x7c\x67\x66\xd9\x58\x9a\xf1\xbb\x41\x2a\x8d\x65\x84\x94\xfc\xdc\x6a\x50\x64\xdb\x56\x33\x76\x0\x10\xed\xbe\xd2\x12\xf6\xf6\x1b\xa2\x16\xde\xae\x31\x95\xdd\xb1\x8\x7e\x4e\xee\xe7\xf9\xa5\xfb\x5b\x61\x43\x0\x40\xf6\x7e\x2\x4\x32\x4e\xc\xe2\x66\xd\xd7\x7\x98\xe\xf8\x72\x34\x6d\x95\x86\xd7\xcb\x31\x54\x47\xd0\x38\x29\x9c\x5a\x68\xd4\x87\x76\xc9\xe7\x7e\xe3\xf4\x81\x6d\x18\xcb\xc9\x5\xaf\xa0\xfb\x66\xf7\xf1\x1c\xc6\x14\x11\x4f\x2b\x79\x42\x8b\xbc\xac\xe7\x6c\xfe\xf\x58\xe7\x7c\x78\x39\x30\xb0\x66\x2c\x9b\x6d\x3a\xe1\xcf\xc9\xa4\xe\x6d\x6d\x8a\xa1\x3a\xe7\x28\xd4\x78\x4c\xa6\xa2\x2a\xa6\x3\x30\xd7\xa8\x25\x66\x87\x2f\x69\x5c\x4e\xdd\xa5\x49\x5d\x37\x4a\x59\xc4\xaf\x1f\xa2\xe4\xf8\xa6\x12\x97\xd5\x79\xf5\xe2\x4a\x2b\x5f\x61\xe4\x9e\xe3\xee\xb8\xa7\x5b\x2f\xf4\x9e\x6c\xfb\xd1\xc6\x56\x77\xba\x75\xaa\x3d\x1a\xa8\xb\xb3\x68\x24\x0\x10\x7f\xfd\xd7\xa1\x8d\x83\x54\x4f\x1f\xd8\x2a\xbe\x8a\xc\x87\xab\xa2\xde\xc3\x39\xbf\x9\x3\xa5\xf3\x5\x28\xe1\xe1\xee\x39\x70\x9c\xd8\x81\x12\x1e\x2\x40\xd2\x6e\xf0\xeb\x1b\x3d\x22\xc6\xe5\xe3\xb4\x5a\x98\xbb\xf0\x22\x28\x8d\xe5\xd3\x16\x48\x24\xa5\xe6\x66\xc\xf9\x8\xf9\x7e\x1e\xe1\x28\x26\x22\xc7\xc7\xa\x32\x47\xfa\xa3\xbe\x3c\xc4\xc5\x53\xa\xd5\x94\x4a\xd7\x93\xd8\x42\x99\xb9\xa\xdb\x56\xf7\xb9\x1c\x53\x4f\xfa\xd3\x74\xad\xd9\x68\xf1\x1b\xdf\x61\xc6\x5e\xa8\x48\xfc\xd4\x4a\x4c\x3c\x32\xf7\x1c\x96\x21\x9b\xf9\xa3\xcc\x5a\xce\xd5\xd7\x8\x24\xf6\x1c\xfd\xdd\x38\xc2\x32\xe9\xb8\xe7\xb6\xfa\x9d\x45\x13\x2c\x83\xfd\x4a\x69\x82\xcd\xdc\xb3\x76\xc\x9e\xd8\xf4\x1b\x45\x15\xb4\x97\xe7\x58\x34\xe2\x3\x29\x5a\xbf\xb6\xe0\x5d\x13\xd9\x2b\xb4\x80\xb2\x45\x81\x6a\x2e\x6c\x89\x7d\xee\xbb\x52\xdd\x1f\x18\xe7\x13\x6b\x33\xe\xea\x36\x92\x77\x7b\x6d\x9c\x5a\x5f\x45\x7b\x7b\x35\x62\x23\xd1\xbf\xf\xd0\x8\x1b\x2b\x80\x6b\x7e\xf1\x21\x47\xb0\x57\xd1\x98\x72\x90\x34\x1c\x20\x4\xff\x3d\x5c\xee\xe\x57\x5f\x6f\x24\x4e\x3c\xea\xfc\xa5\xa9\x83\xc9\x61\xb4\x51\x24\xf8\x27\x5e\x46\x8c\xb1\x53\x2\x96\x35\xba\xb8\x4c\x71\xd3\x15\x59\x35\x22\x20\xad\x3\x9f\x66\x44\x3b\x9c\x35\x37\x1f\x9b\xbb\xf3\xdb\x35\x63\x30\x64\xaa\xa2\x6\xa8\x5d\xbb\xe1\x9f\x70\xec\x82\x11\x6\x36\xec\x8b\x69\x66\x24\x44\xc9\x4a\x57\xbb\x9b\x78\x13\xce\x9c\xc\xba\x92\x93\x63\xb8\xe2\x95\xf\xf\x16\x39\x52\xfd\x3a\x6d\x2\x4b\xdf\x13\xd3\x2a\x22\xb4\x3\x7c\x54\x49\x96\x68\x54\x10\xfa\xef\xaa\x6c\xe8\x22\xdc\x71\x16\x13\x1a\xf6\x28\xe5\x6d\x77\x3d\xcd\x30\x63\xb1\x70\x52\xa1\xc5\x94\x5f\xcf\xe8\xb8\x26\x98\xf7\x6\xa0\xa\x70\xfa\x3\x80\xac\xc1\xec\xd6\x4c\x54\xd7\xfe\x47\xb6\x88\x4a\xf7\x71\x24\xee\xf3\xd2\xc2\x4a\x7f\xfe\x61\xc7\x35\xc9\x37\x67\xcb\x24\x35\xda\x7e\xca\x5f\xf3\x8d\xd4\x13\x8e\xd6\xcb\x4d\x53\x8f\x53\x1f\xc0\x74\xf7\x53\xb9\x5e\x23\x37\xba\x6e\xe3\x9d\x7\x55\x25\x7b\xe6\x2a\x64\xd1\x32\xdd\x54\x1b\x4b\xc0\xe1\xd7\x69\x58\xf8\x93\x29\xc4\xdd\x23\x2f\xa5\xfc\x9d\x7e\xf8\xd4\x90\xcd\x82\x55\xdc\x16\x16\x9f\x7\x52\x9b\x9d\x25\xed\x32\xc5\x7b\xdf\xf6\x83\x46\x3d\x65\xb7\xef\x87\x7a\x12\x69\x8f\x6\x7c\x51\x15\x4a\x8\xe8\xac\x9a\xc\x24\xa7\x27\xd8\x46\x2f\xe7\x1\xe\x1c\xc6\x91\xb0\x6e\x85\x65\xf0\x29\xd\x2e\x6b\x3b\xfb\x4b\xdf\xe4\x80\x93\x3\x66\x46\x3e\x8a\x6e\xf3\x5e\x4d\x62\xe\x49\x5\xaf\xd4\xf8\x21\x20\x61\x1d\x39\x17\xf4\x61\x47\x95\xfb\x15\x2e\xb3\x4f\xd0\x5d\xf5\x7d\x40\xda\x90\x3c\x6b\xcb\x17\x0\x13\x3b\x64\x34\x1b\xf0\xf2\xe5\x3b\xb2\xc7\xd3\x5f\x3a\x44\xa6\x9b\xb7\x78\xe\x42\x5d\x4c\xc1\xe9\xd2\xcb\xb7\x78\xd1\xfe\x9a\xb5\x7\xe9\xe0\xbe\xe2\x8a\xa7\x1\x83\x0\x8c\x5c\x8\xe6\x63\x12\x92\xb7\xb7\xa6\x19\x7d\x38\x13\x38\x92\x87\x24\xf9\x48\xb3\x5e\x87\x6a\x40\x39\x5c\x3f\xed\x8f\xee\xdb\x15\x82\x6\xda\x49\x21\x2b\xb5\xbf\x32\x7c\x9f\x42\x28\x63\xcf\xaf\x1e\xf8\xc6\xa0\xd1\x2\x43\x57\x62\xec\x9b\xf\x1\x9e\x71\xd8\x87\x9d\x1\xc1\x58\x77\xd9\xaf\xb1\x10\x7e\xdd\xa6\x50\x96\xe5\xf0\x72\x0\x6d\x4b\xf8\x2a\x8f\x19\xf3\x22\x88\x11\x4a\x8b\x7c\xfd\xb7\xed\xe1\xf6\x40\x39\xe0\xe9\xf6\x3d\x25\xe6\x74\x3c\x58\x57\x7f\xe1\x22\x96\x47\x31\x91\xba\x70\x85\x28\x6b\x9f\x6e\x25\xac\x23\x66\x2f\x29\x88\x28\xce\x8c\x5c\x88\x53\xd1\x3b\xcc\x6a\x51\xb2\xe1\x28\x3f\x91\xb4\xd\x0\x3a\xe3\xf8\xc3\x8f\xd7\x96\x62\xe\x2e\xfc\xc8\x6c\x77\xa6\x1d\x22\xc1\xb8\xe6\x61\xd7\x67\x36\x13\x7b\xbb\x9b\x59\x9\xa6\xdf\xf7\x6b\xa3\x40\x1a\xf5\x4f\xb4\xda\xd3\xf3\x81\x93\xc6\x18\xd9\x26\xee\xac\xf0\xaa\xdf\xc5\x9c\xca\xc2\xa2\xcc\x7b\x5c\x24\xb0\xbc\xd0\x6a\x4d\x89\x9\xb8\x7\xfe\x87\xad\xa\xea\xb8\x42\xf9\x5e\xb3\x3e\x36\x4c\xaf\x75\x9e\x1c\xeb\xbd\xbc\xbb\x80\x40\xa7\x3a\x30\xbf\xa8\x44\xf4\xeb\x38\xad\x29\xba\x23\xed\x41\xc\xea\xd2\xbb\x41\x18\xd6\xb9\xba\x65\x2b\xa3\x91\x6d\x1f\xa9\xf4\xd1\x25\x8d\x4d\x38\xff\x64\xa0\xec\xde\xa6\xb6\x79\xab\x8e\x33\x6c\x47\xde\xaf\x94\xa4\xa5\x86\x77\x55\x9\x92\x81\x31\x76\xc7\x34\x22\x89\x8e\x3d\x26\x26\xd7\xfc\x1e\x16\x72\x13\x33\x63\xd5\x22\xbe\xb8\x4\x34\x84\x41\xbb\x80\xd0\x9f\x46\x48\x7\xa7\xfc\x2b\x3a\x75\x55\x8c\xc7\x6a\xbd\x7e\x46\x8\x84\xf\xd5\x74\xc0\x82\x8e\xaa\x61\x5\x1\xb2\x47\x6e\x20\x6a\x2d\x58\x70\x48\x32\xa7\x37\xd2\xb8\x82\x1a\x51\xb9\x61\xdd\xfd\x9d\x6b\xe\x18\x97\xf8\x45\x5f\x87\x10\xcf\x34\x72\x45\x26\x49\x70\xe7\xa3\x78\xe0\x52\x89\x84\x94\x83\x82\xc2\x69\x8f\xe3\xe1\x3f\x60\x74\x88\xc4\xf7\x75\x2c\xfb\xbd\xb6\xc4\x7e\x10\xa\x6c\x90\x4\x9e\xc3\x3f\x59\x7c\xce\x31\x18\x60\x57\x73\x46\x94\x7d\x6\xa0\x6d\x44\xec\xa2\xa\x9e\x5\x15\xef\xca\x5c\xbf\x0\xeb\xf7\x3d\x32\xd4\xa5\xef\x49\x89\x5e\x46\xb0\xa6\x63\x5b\x8a\x73\xae\x6f\xd5\x9d\xf8\x4f\x40\xb5\xb2\x6e\xd3\xb6\x1\xa9\x26\xa2\x21\xcf\x33\x7a\x3a\xa4\x23\x13\xb0\x69\x6a\xee\xce\xd8\x9d\x1\x1d\x50\xc1\x30\x6c\xb1\xcd\xa0\xf0\xf0\xa2\x64\x6f\xbb\xbf\x5e\xe6\xab\x87\xb4\xf\x4f\x15\xaf\xb5\x25\xa1\xb2\xd0\x80\x2c\xfb\xf9\xfe\xd2\x33\xbb\x76\xfe\x7c\xa8\x66\xf7\xe7\x85\x9f\x1f\x85\x57\x88\xe1\xe9\x63\xe4\xd8\x1c\xa1\xfb\xda\x44\x5\x2e\x1d\x3a\x1c\xff\xc8\x3b\xc0\xfe\xda\x22\xb\x43\xd6\x88\x39\x4c\x4a\xa6\x69\x18\x93\x42\x4e\xb5\xcc\x66\xd\x9\xf8\x1e\x7c\xd3\x3c\x99\xd\x50\x1d\x62\xe9\x57\x6\xbf\x19\x88\xdd\xad\x7b\x4f\xf9\xc7\x82\x6d\x8d\xc8\xc4\xc5\x78\x17\x20\x15\xc5\x52\x41\xcf\x5b\xd6\x7f\x94\x2\x41\xe0\x40\x22\x3\x5e\xd1\x53\xd4\x86\xd3\x2c\x9f\xf\x96\xe3\x6b\x9a\x76\x32\x6\x47\x4b\x11\xb3\xdd\x3\x65\xbd\x9b\x1\xda\x9c\xb9\x7e\x3f\x6a\xc4\x7b\xea\xd4\x3c\xb9\xfb\x5c\x6b\x64\x33\x52\xba\x64\x78\x8f\xa4\xaf\x7a\x61\x8d\xbc\xc5\x73\xe9\x6b\x58\x97\x4b\xbf\x63\x22\xd3\x37\x2\x54\xc5\xb9\x16\x4a\xf0\x19\xd8\x94\x57\xb8\x8a\xb3\x16\x3b\xd0\x84\x8e\x67\xa6\xa3\x7d\x78\xec\x0"
+} };
+
+static const struct chacha20poly1305_testvec xchacha20poly1305_enc_vectors[] __initconst = { {
+	.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a\xf3\x33\x88\x86\x04\xf6\xb5\xf0\x47\x39\x17\xc1\x40\x2b\x80\x09\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+	.nonce	= "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17",
+	.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00\x00\x00\x4e\x91",
+	.alen	= 12,
+	.input	= "\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x72\x65\x20\x64\x72\x61\x66\x74\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x76\x61\x6c\x69\x64\x20\x66\x6f\x72\x20\x61\x20\x6d\x61\x78\x69\x6d\x75\x6d\x20\x6f\x66\x20\x73\x69\x78\x20\x6d\x6f\x6e\x74\x68\x73\x20\x61\x6e\x64\x20\x6d\x61\x79\x20\x62\x65\x20\x75\x70\x64\x61\x74\x65\x64\x2c\x20\x72\x65\x70\x6c\x61\x63\x65\x64\x2c\x20\x6f\x72\x20\x6f\x62\x73\x6f\x6c\x65\x74\x65\x64\x20\x62\x79\x20\x6f\x74\x68\x65\x72\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x61\x74\x20\x61\x6e\x79\x20\x74\x69\x6d\x65\x2e\x20\x49\x74\x20\x69\x73\x20\x69\x6e\x61\x70\x70\x72\x6f\x70\x72\x69\x61\x74\x65\x20\x74\x6f\x20\x75\x73\x65\x20\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x73\x20\x72\x65\x66\x65\x72\x65\x6e\x63\x65\x20\x6d\x61\x74\x65\x72\x69\x61\x6c\x20\x6f\x72\x20\x74\x6f\x20\x63\x69\x74\x65\x20\x74\x68\x65\x6d\x20\x6f\x74\x68\x65\x72\x20\x74\x68\x61\x6e\x20\x61\x73\x20\x2f\xe2\x80\x9c\x77\x6f\x72\x6b\x20\x69\x6e\x20\x70\x72\x6f\x67\x72\x65\x73\x73\x2e\x2f\xe2\x80\x9d",
+	.ilen	= 265,
+	.result	= "\x1a\x6e\x3a\xd9\xfd\x41\x3f\x77\x54\x72\x0a\x70\x9a\xa0\x29\x92\x2e\xed\x93\xcf\x0f\x71\x88\x18\x7a\x9d\x2d\x24\xe0\xf5\xea\x3d\x55\x64\xd7\xad\x2a\x1a\x1f\x7e\x86\x6d\xb0\xce\x80\x41\x72\x86\x26\xee\x84\xd7\xef\x82\x9e\xe2\x60\x9d\x5a\xfc\xf0\xe4\x19\x85\xea\x09\xc6\xfb\xb3\xa9\x50\x09\xec\x5e\x11\x90\xa1\xc5\x4e\x49\xef\x50\xd8\x8f\xe0\x78\xd7\xfd\xb9\x3b\xc9\xf2\x91\xc8\x25\xc8\xa7\x63\x60\xce\x10\xcd\xc6\x7f\xf8\x16\xf8\xe1\x0a\xd9\xde\x79\x50\x33\xf2\x16\x0f\x17\xba\xb8\x5d\xd8\xdf\x4e\x51\xa8\x39\xd0\x85\xca\x46\x6a\x10\xa7\xa3\x88\xef\x79\xb9\xf8\x24\xf3\xe0\x71\x7b\x76\x28\x46\x3a\x3a\x1b\x91\xb6\xd4\x3e\x23\xe5\x44\x15\xbf\x60\x43\x9d\xa4\xbb\xd5\x5f\x89\xeb\xef\x8e\xfd\xdd\xb4\x0d\x46\xf0\x69\x23\x63\xae\x94\xf5\x5e\xa5\xad\x13\x1c\x41\x76\xe6\x90\xd6\x6d\xa2\x8f\x97\x4c\xa8\x0b\xcf\x8d\x43\x2b\x9c\x9b\xc5\x58\xa5\xb6\x95\x9a\xbf\x81\xc6\x54\xc9\x66\x0c\xe5\x4f\x6a\x53\xa1\xe5\x0c\xba\x31\xde\x34\x64\x73\x8a\x3b\xbd\x92\x01\xdb\x71\x69\xf3\x58\x99\xbc\xd1\xcb\x4a\x05\xe2\x58\x9c\x25\x17\xcd\xdc\x83\xb7\xff\xfb\x09\x61\xad\xbf\x13\x5b\x5e\xed\x46\x82\x6f\x22\xd8\x93\xa6\x85\x5b\x40\x39\x5c\xc5\x9c"
+} };
+static const struct chacha20poly1305_testvec xchacha20poly1305_dec_vectors[] __initconst = {	{
+	.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a\xf3\x33\x88\x86\x04\xf6\xb5\xf0\x47\x39\x17\xc1\x40\x2b\x80\x09\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+	.nonce	= "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17",
+	.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00\x00\x00\x4e\x91",
+	.alen	= 12,
+	.input	= "\x1a\x6e\x3a\xd9\xfd\x41\x3f\x77\x54\x72\x0a\x70\x9a\xa0\x29\x92\x2e\xed\x93\xcf\x0f\x71\x88\x18\x7a\x9d\x2d\x24\xe0\xf5\xea\x3d\x55\x64\xd7\xad\x2a\x1a\x1f\x7e\x86\x6d\xb0\xce\x80\x41\x72\x86\x26\xee\x84\xd7\xef\x82\x9e\xe2\x60\x9d\x5a\xfc\xf0\xe4\x19\x85\xea\x09\xc6\xfb\xb3\xa9\x50\x09\xec\x5e\x11\x90\xa1\xc5\x4e\x49\xef\x50\xd8\x8f\xe0\x78\xd7\xfd\xb9\x3b\xc9\xf2\x91\xc8\x25\xc8\xa7\x63\x60\xce\x10\xcd\xc6\x7f\xf8\x16\xf8\xe1\x0a\xd9\xde\x79\x50\x33\xf2\x16\x0f\x17\xba\xb8\x5d\xd8\xdf\x4e\x51\xa8\x39\xd0\x85\xca\x46\x6a\x10\xa7\xa3\x88\xef\x79\xb9\xf8\x24\xf3\xe0\x71\x7b\x76\x28\x46\x3a\x3a\x1b\x91\xb6\xd4\x3e\x23\xe5\x44\x15\xbf\x60\x43\x9d\xa4\xbb\xd5\x5f\x89\xeb\xef\x8e\xfd\xdd\xb4\x0d\x46\xf0\x69\x23\x63\xae\x94\xf5\x5e\xa5\xad\x13\x1c\x41\x76\xe6\x90\xd6\x6d\xa2\x8f\x97\x4c\xa8\x0b\xcf\x8d\x43\x2b\x9c\x9b\xc5\x58\xa5\xb6\x95\x9a\xbf\x81\xc6\x54\xc9\x66\x0c\xe5\x4f\x6a\x53\xa1\xe5\x0c\xba\x31\xde\x34\x64\x73\x8a\x3b\xbd\x92\x01\xdb\x71\x69\xf3\x58\x99\xbc\xd1\xcb\x4a\x05\xe2\x58\x9c\x25\x17\xcd\xdc\x83\xb7\xff\xfb\x09\x61\xad\xbf\x13\x5b\x5e\xed\x46\x82\x6f\x22\xd8\x93\xa6\x85\x5b\x40\x39\x5c\xc5\x9c",
+	.ilen	= 281,
+	.result	= "\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x72\x65\x20\x64\x72\x61\x66\x74\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x76\x61\x6c\x69\x64\x20\x66\x6f\x72\x20\x61\x20\x6d\x61\x78\x69\x6d\x75\x6d\x20\x6f\x66\x20\x73\x69\x78\x20\x6d\x6f\x6e\x74\x68\x73\x20\x61\x6e\x64\x20\x6d\x61\x79\x20\x62\x65\x20\x75\x70\x64\x61\x74\x65\x64\x2c\x20\x72\x65\x70\x6c\x61\x63\x65\x64\x2c\x20\x6f\x72\x20\x6f\x62\x73\x6f\x6c\x65\x74\x65\x64\x20\x62\x79\x20\x6f\x74\x68\x65\x72\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x61\x74\x20\x61\x6e\x79\x20\x74\x69\x6d\x65\x2e\x20\x49\x74\x20\x69\x73\x20\x69\x6e\x61\x70\x70\x72\x6f\x70\x72\x69\x61\x74\x65\x20\x74\x6f\x20\x75\x73\x65\x20\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x73\x20\x72\x65\x66\x65\x72\x65\x6e\x63\x65\x20\x6d\x61\x74\x65\x72\x69\x61\x6c\x20\x6f\x72\x20\x74\x6f\x20\x63\x69\x74\x65\x20\x74\x68\x65\x6d\x20\x6f\x74\x68\x65\x72\x20\x74\x68\x61\x6e\x20\x61\x73\x20\x2f\xe2\x80\x9c\x77\x6f\x72\x6b\x20\x69\x6e\x20\x70\x72\x6f\x67\x72\x65\x73\x73\x2e\x2f\xe2\x80\x9d"
+} };
+
+bool __init chacha20poly1305_selftest(void)
+{
+	size_t i;
+	u8 computed_result[3000];
+	bool success = true, ret;
+
+	for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
+		memset(computed_result, 0, sizeof(computed_result));
+		chacha20poly1305_encrypt(computed_result, chacha20poly1305_enc_vectors[i].input, chacha20poly1305_enc_vectors[i].ilen, chacha20poly1305_enc_vectors[i].assoc, chacha20poly1305_enc_vectors[i].alen, le64_to_cpu(*(__force __le64 *)chacha20poly1305_enc_vectors[i].nonce), chacha20poly1305_enc_vectors[i].key);
+		if (memcmp(computed_result, chacha20poly1305_enc_vectors[i].result, chacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE)) {
+			pr_info("chacha20poly1305 encryption self-test %zu: FAIL\n", i + 1);
+			success = false;
+		}
+	}
+	for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
+		memset(computed_result, 0, sizeof(computed_result));
+		ret = chacha20poly1305_decrypt(computed_result, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpu(*(__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key);
+		if (!ret || memcmp(computed_result, chacha20poly1305_dec_vectors[i].result, chacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE)) {
+			pr_info("chacha20poly1305 decryption self-test %zu: FAIL\n", i + 1);
+			success = false;
+		}
+	}
+	for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) {
+		memset(computed_result, 0, sizeof(computed_result));
+		xchacha20poly1305_encrypt(computed_result, xchacha20poly1305_enc_vectors[i].input, xchacha20poly1305_enc_vectors[i].ilen, xchacha20poly1305_enc_vectors[i].assoc, xchacha20poly1305_enc_vectors[i].alen, xchacha20poly1305_enc_vectors[i].nonce, xchacha20poly1305_enc_vectors[i].key);
+		if (memcmp(computed_result, xchacha20poly1305_enc_vectors[i].result, xchacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE)) {
+			pr_info("xchacha20poly1305 encryption self-test %zu: FAIL\n", i + 1);
+			success = false;
+		}
+	}
+	for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) {
+		memset(computed_result, 0, sizeof(computed_result));
+		ret = xchacha20poly1305_decrypt(computed_result, xchacha20poly1305_dec_vectors[i].input, xchacha20poly1305_dec_vectors[i].ilen, xchacha20poly1305_dec_vectors[i].assoc, xchacha20poly1305_dec_vectors[i].alen, xchacha20poly1305_dec_vectors[i].nonce, xchacha20poly1305_dec_vectors[i].key);
+		if (!ret || memcmp(computed_result, xchacha20poly1305_dec_vectors[i].result, xchacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE)) {
+			pr_info("xchacha20poly1305 decryption self-test %zu: FAIL\n", i + 1);
+			success = false;
+		}
+	}
+	if (success)
+		pr_info("chacha20poly1305 self-tests: pass\n");
+	return success;
+}
+#endif
diff --git b/net/wireguard/selftest/counter.h b/net/wireguard/selftest/counter.h
new file mode 100644
index 0000000..f066974
--- /dev/null
+++ b/net/wireguard/selftest/counter.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+bool __init packet_counter_selftest(void)
+{
+	bool success = true;
+	unsigned int test_num = 0, i;
+	union noise_counter counter;
+
+#define T_INIT do { memset(&counter, 0, sizeof(union noise_counter)); spin_lock_init(&counter.receive.lock); } while (0)
+#define T_LIM (COUNTER_WINDOW_SIZE + 1)
+#define T(n, v) do { ++test_num; if (counter_validate(&counter, n) != v) { pr_info("nonce counter self-test %u: FAIL\n", test_num); success = false; } } while (0)
+	T_INIT;
+	/*  1 */ T(0, true);
+	/*  2 */ T(1, true);
+	/*  3 */ T(1, false);
+	/*  4 */ T(9, true);
+	/*  5 */ T(8, true);
+	/*  6 */ T(7, true);
+	/*  7 */ T(7, false);
+	/*  8 */ T(T_LIM, true);
+	/*  9 */ T(T_LIM - 1, true);
+	/* 10 */ T(T_LIM - 1, false);
+	/* 11 */ T(T_LIM - 2, true);
+	/* 12 */ T(2, true);
+	/* 13 */ T(2, false);
+	/* 14 */ T(T_LIM + 16, true);
+	/* 15 */ T(3, false);
+	/* 16 */ T(T_LIM + 16, false);
+	/* 17 */ T(T_LIM * 4, true);
+	/* 18 */ T(T_LIM * 4 - (T_LIM - 1), true);
+	/* 19 */ T(10, false);
+	/* 20 */ T(T_LIM * 4 - T_LIM, false);
+	/* 21 */ T(T_LIM * 4 - (T_LIM + 1), false);
+	/* 22 */ T(T_LIM * 4 - (T_LIM - 2), true);
+	/* 23 */ T(T_LIM * 4 + 1 - T_LIM, false);
+	/* 24 */ T(0, false);
+	/* 25 */ T(REJECT_AFTER_MESSAGES, false);
+	/* 26 */ T(REJECT_AFTER_MESSAGES - 1, true);
+	/* 27 */ T(REJECT_AFTER_MESSAGES, false);
+	/* 28 */ T(REJECT_AFTER_MESSAGES - 1, false);
+	/* 29 */ T(REJECT_AFTER_MESSAGES - 2, true);
+	/* 30 */ T(REJECT_AFTER_MESSAGES + 1, false);
+	/* 31 */ T(REJECT_AFTER_MESSAGES + 2, false);
+	/* 32 */ T(REJECT_AFTER_MESSAGES - 2, false);
+	/* 33 */ T(REJECT_AFTER_MESSAGES - 3, true);
+	/* 34 */ T(0, false);
+
+	T_INIT;
+	for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i)
+		T(i, true);
+	T(0, true);
+	T(0, false);
+
+	T_INIT;
+	for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i)
+		T(i, true);
+	T(1, true);
+	T(0, false);
+
+	T_INIT;
+	for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0 ;)
+		T(i, true);
+
+	T_INIT;
+	for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1 ;)
+		T(i, true);
+	T(0, false);
+
+	T_INIT;
+	for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1 ;)
+		T(i, true);
+	T(COUNTER_WINDOW_SIZE + 1, true);
+	T(0, false);
+
+	T_INIT;
+	for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1 ;)
+		T(i, true);
+	T(0, true);
+	T(COUNTER_WINDOW_SIZE + 1, true);
+#undef T
+#undef T_LIM
+#undef T_INIT
+
+	if (success)
+		pr_info("nonce counter self-tests: pass\n");
+	return success;
+}
+#endif
diff --git b/net/wireguard/selftest/curve25519.h b/net/wireguard/selftest/curve25519.h
new file mode 100644
index 0000000..e62e2fc
--- /dev/null
+++ b/net/wireguard/selftest/curve25519.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+struct curve25519_test_vector {
+	u8 private[CURVE25519_POINT_SIZE];
+	u8 public[CURVE25519_POINT_SIZE];
+	u8 result[CURVE25519_POINT_SIZE];
+	bool valid;
+};
+static const struct curve25519_test_vector curve25519_test_vectors[] __initconst = {
+	{
+		.private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a },
+		.public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f },
+		.result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
+		.valid = true
+	},
+	{
+		.private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb },
+		.public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a },
+		.result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
+		.valid = true
+	},
+	{
+		.private = { 1 },
+		.public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+		.result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, 0xb, 0x95, 0x48, 0xdc, 0xc, 0xd8, 0x19, 0x98, 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f },
+		.valid = true
+	},
+	{
+		.private = { 1 },
+		.public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+		.result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x8, 0xed, 0xe3, 0xb, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 },
+		.valid = true
+	},
+	{
+		.private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 },
+		.public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
+		.result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
+		.valid = true
+	},
+	{
+		.private = { 1, 2, 3, 4 },
+		.public = { 0 },
+		.result = { 0 },
+		.valid = false
+	},
+	{
+		.private = { 2, 4, 6, 8 },
+		.public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 },
+		.result = { 0 },
+		.valid = false
+	}
+};
+bool __init curve25519_selftest(void)
+{
+	bool success = true, ret;
+	size_t i = 0;
+	u8 out[CURVE25519_POINT_SIZE];
+
+	for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) {
+		memset(out, 0, CURVE25519_POINT_SIZE);
+		ret = curve25519(out, curve25519_test_vectors[i].private, curve25519_test_vectors[i].public);
+		if (ret != curve25519_test_vectors[i].valid || memcmp(out, curve25519_test_vectors[i].result, CURVE25519_POINT_SIZE)) {
+			pr_info("curve25519 self-test %zu: FAIL\n", i + 1);
+			success = false;
+			break;
+		}
+	}
+
+	if (success)
+		pr_info("curve25519 self-tests: pass\n");
+	return success;
+}
+#endif
diff --git b/net/wireguard/selftest/ratelimiter.h b/net/wireguard/selftest/ratelimiter.h
new file mode 100644
index 0000000..aa5db38
--- /dev/null
+++ b/net/wireguard/selftest/ratelimiter.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+
+#include <linux/jiffies.h>
+
+static const struct { bool result; unsigned int msec_to_sleep_before; } expected_results[] __initconst = {
+	[0 ... PACKETS_BURSTABLE - 1] = { true, 0 },
+	[PACKETS_BURSTABLE] = { false, 0 },
+	[PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND },
+	[PACKETS_BURSTABLE + 2] = { false, 0 },
+	[PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 },
+	[PACKETS_BURSTABLE + 4] = { true, 0 },
+	[PACKETS_BURSTABLE + 5] = { false, 0 }
+};
+
+static __init unsigned int maximum_jiffies_at_index(int index)
+{
+	unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3;
+	int i;
+
+	for (i = 0; i <= index; ++i)
+		total_msecs += expected_results[i].msec_to_sleep_before;
+	return msecs_to_jiffies(total_msecs);
+}
+
+bool __init ratelimiter_selftest(void)
+{
+	struct sk_buff *skb4;
+	struct iphdr *hdr4;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct sk_buff *skb6;
+	struct ipv6hdr *hdr6;
+#endif
+	int i, test = 0, tries = 0, ret = false;
+	unsigned long loop_start_time;
+
+	BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0);
+
+	if (ratelimiter_init())
+		goto out;
+	++test;
+	if (ratelimiter_init()) {
+		ratelimiter_uninit();
+		goto out;
+	}
+	++test;
+	if (ratelimiter_init()) {
+		ratelimiter_uninit();
+		ratelimiter_uninit();
+		goto out;
+	}
+	++test;
+
+	skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL);
+	if (!skb4)
+		goto err_nofree;
+	skb4->protocol = htons(ETH_P_IP);
+	hdr4 = (struct iphdr *)skb_put(skb4, sizeof(struct iphdr));
+	hdr4->saddr = htonl(8182);
+	skb_reset_network_header(skb4);
+	++test;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL);
+	if (!skb6) {
+		kfree_skb(skb4);
+		goto err_nofree;
+	}
+	skb6->protocol = htons(ETH_P_IPV6);
+	hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(struct ipv6hdr));
+	hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212);
+	hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188);
+	skb_reset_network_header(skb6);
+	++test;
+#endif
+
+restart:
+	loop_start_time = jiffies;
+	for (i = 0; i < ARRAY_SIZE(expected_results); ++i) {
+#define ensure_time do {\
+		if (time_is_before_jiffies(loop_start_time + maximum_jiffies_at_index(i))) { \
+			if (++tries >= 5000) \
+				goto err; \
+			gc_entries(NULL); \
+			rcu_barrier(); \
+			msleep(500); \
+			goto restart; \
+		}} while (0)
+
+		if (expected_results[i].msec_to_sleep_before)
+			msleep(expected_results[i].msec_to_sleep_before);
+
+		ensure_time;
+		if (ratelimiter_allow(skb4, &init_net) != expected_results[i].result)
+			goto err;
+		++test;
+		hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1);
+		ensure_time;
+		if (!ratelimiter_allow(skb4, &init_net))
+			goto err;
+		++test;
+		hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1);
+
+#if IS_ENABLED(CONFIG_IPV6)
+		hdr6->saddr.in6_u.u6_addr32[2] = hdr6->saddr.in6_u.u6_addr32[3] = htonl(i);
+		ensure_time;
+		if (ratelimiter_allow(skb6, &init_net) != expected_results[i].result)
+			goto err;
+		++test;
+		hdr6->saddr.in6_u.u6_addr32[0] = htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1);
+		ensure_time;
+		if (!ratelimiter_allow(skb6, &init_net))
+			goto err;
+		++test;
+		hdr6->saddr.in6_u.u6_addr32[0] = htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1);
+		ensure_time;
+#endif
+	}
+
+	gc_entries(NULL);
+	rcu_barrier();
+
+	if (atomic_read(&total_entries))
+		goto err;
+	++test;
+
+	for (i = 0; i <= max_entries; ++i) {
+		hdr4->saddr = htonl(i);
+		if (ratelimiter_allow(skb4, &init_net) != (i != max_entries))
+			goto err;
+		++test;
+	}
+
+	ret = true;
+
+err:
+	kfree_skb(skb4);
+#if IS_ENABLED(CONFIG_IPV6)
+	kfree_skb(skb6);
+#endif
+err_nofree:
+	ratelimiter_uninit();
+	ratelimiter_uninit();
+	ratelimiter_uninit();
+out:
+	if (ret)
+		pr_info("ratelimiter self-tests: pass\n");
+	else
+		pr_info("ratelimiter self-test %d: fail\n", test);
+
+	return ret;
+}
+#endif
diff --git b/net/wireguard/send.c b/net/wireguard/send.c
new file mode 100644
index 0000000..01e6229
--- /dev/null
+++ b/net/wireguard/send.c
@@ -0,0 +1,348 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+#include "timers.h"
+#include "device.h"
+#include "peer.h"
+#include "socket.h"
+#include "messages.h"
+#include "cookie.h"
+
+#include <linux/uio.h>
+#include <linux/inetdevice.h>
+#include <linux/socket.h>
+#include <linux/jiffies.h>
+#include <net/ip_tunnels.h>
+#include <net/udp.h>
+#include <net/sock.h>
+
+static void packet_send_handshake_initiation(struct wireguard_peer *peer)
+{
+	struct message_handshake_initiation packet;
+
+	down_write(&peer->handshake.lock);
+	if (!time_is_before_jiffies64(peer->last_sent_handshake + REKEY_TIMEOUT)) {
+		up_write(&peer->handshake.lock);
+		return; /* This function is rate limited. */
+	}
+	peer->last_sent_handshake = get_jiffies_64();
+	up_write(&peer->handshake.lock);
+
+	net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr);
+
+	if (noise_handshake_create_initiation(&packet, &peer->handshake)) {
+		cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
+		timers_any_authenticated_packet_traversal(peer);
+		socket_send_buffer_to_peer(peer, &packet, sizeof(struct message_handshake_initiation), HANDSHAKE_DSCP);
+		timers_handshake_initiated(peer);
+	}
+}
+
+void packet_handshake_send_worker(struct work_struct *work)
+{
+	struct wireguard_peer *peer = container_of(work, struct wireguard_peer, transmit_handshake_work);
+
+	packet_send_handshake_initiation(peer);
+	peer_put(peer);
+}
+
+void packet_send_queued_handshake_initiation(struct wireguard_peer *peer, bool is_retry)
+{
+	if (!is_retry)
+		peer->timer_handshake_attempts = 0;
+
+	/* First checking the timestamp here is just an optimization; it will
+	 * be caught while properly locked inside the actual work queue.
+	 */
+	if (!time_is_before_jiffies64(peer->last_sent_handshake + REKEY_TIMEOUT))
+		return;
+
+	peer = peer_rcu_get(peer);
+	/* Queues up calling packet_send_queued_handshakes(peer), where we do a peer_put(peer) after: */
+	if (!queue_work(peer->device->handshake_send_wq, &peer->transmit_handshake_work))
+		peer_put(peer); /* If the work was already queued, we want to drop the extra reference */
+}
+
+void packet_send_handshake_response(struct wireguard_peer *peer)
+{
+	struct message_handshake_response packet;
+
+	net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr);
+	peer->last_sent_handshake = get_jiffies_64();
+
+	if (noise_handshake_create_response(&packet, &peer->handshake)) {
+		cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
+		if (noise_handshake_begin_session(&peer->handshake, &peer->keypairs)) {
+			timers_session_derived(peer);
+			timers_any_authenticated_packet_traversal(peer);
+			socket_send_buffer_to_peer(peer, &packet, sizeof(struct message_handshake_response), HANDSHAKE_DSCP);
+		}
+	}
+}
+
+void packet_send_handshake_cookie(struct wireguard_device *wg, struct sk_buff *initiating_skb, __le32 sender_index)
+{
+	struct message_handshake_cookie packet;
+
+	net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n", wg->dev->name, initiating_skb);
+	cookie_message_create(&packet, initiating_skb, sender_index, &wg->cookie_checker);
+	socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet, sizeof(packet));
+}
+
+static inline void keep_key_fresh(struct wireguard_peer *peer)
+{
+	struct noise_keypair *keypair;
+	bool send = false;
+
+	rcu_read_lock_bh();
+	keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
+	if (likely(keypair && keypair->sending.is_valid) &&
+	   (unlikely(atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES) ||
+	   (keypair->i_am_the_initiator && unlikely(time_is_before_eq_jiffies64(keypair->sending.birthdate + REKEY_AFTER_TIME)))))
+		send = true;
+	rcu_read_unlock_bh();
+
+	if (send)
+		packet_send_queued_handshake_initiation(peer, false);
+}
+
+static inline unsigned int skb_padding(struct sk_buff *skb)
+{
+	/* We do this modulo business with the MTU, just in case the networking layer
+	 * gives us a packet that's bigger than the MTU. Since we support GSO, this
+	 * isn't strictly neccessary, but it's better to be cautious here, especially
+	 * if that code ever changes.
+	 */
+	unsigned int last_unit = skb->len % skb->dev->mtu;
+	unsigned int padded_size = (last_unit + MESSAGE_PADDING_MULTIPLE - 1) & ~(MESSAGE_PADDING_MULTIPLE - 1);
+
+	if (padded_size > skb->dev->mtu)
+		padded_size = skb->dev->mtu;
+	return padded_size - last_unit;
+}
+
+static inline bool skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair, bool have_simd)
+{
+	struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1];
+	struct message_data *header;
+	unsigned int padding_len, plaintext_len, trailer_len;
+	int num_frags;
+	struct sk_buff *trailer;
+
+	/* Calculate lengths */
+	padding_len = skb_padding(skb);
+	trailer_len = padding_len + noise_encrypted_len(0);
+	plaintext_len = skb->len + padding_len;
+
+	/* Expand data section to have room for padding and auth tag */
+	num_frags = skb_cow_data(skb, trailer_len, &trailer);
+	if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
+		return false;
+
+	/* Set the padding to zeros, and make sure it and the auth tag are part of the skb */
+	memset(skb_tail_pointer(trailer), 0, padding_len);
+
+	/* Expand head section to have room for our header and the network stack's headers. */
+	if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0))
+		return false;
+
+	/* We have to remember to add the checksum to the innerpacket, in case the receiver forwards it. */
+	if (likely(!skb_checksum_setup(skb, true)))
+		skb_checksum_help(skb);
+
+	/* Only after checksumming can we safely add on the padding at the end and the header. */
+	skb_set_inner_network_header(skb, 0);
+	header = (struct message_data *)skb_push(skb, sizeof(struct message_data));
+	header->header.type = cpu_to_le32(MESSAGE_DATA);
+	header->key_idx = keypair->remote_index;
+	header->counter = cpu_to_le64(PACKET_CB(skb)->nonce);
+	pskb_put(skb, trailer, trailer_len);
+
+	/* Now we can encrypt the scattergather segments */
+	sg_init_table(sg, num_frags);
+	if (skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(plaintext_len)) <= 0)
+		return false;
+	return chacha20poly1305_encrypt_sg(sg, sg, plaintext_len, NULL, 0, PACKET_CB(skb)->nonce, keypair->sending.key, have_simd);
+}
+
+void packet_send_keepalive(struct wireguard_peer *peer)
+{
+	struct sk_buff *skb;
+
+	if (skb_queue_empty(&peer->staged_packet_queue)) {
+		skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, GFP_ATOMIC);
+		if (unlikely(!skb))
+			return;
+		skb_reserve(skb, DATA_PACKET_HEAD_ROOM);
+		skb->dev = peer->device->dev;
+		skb_queue_tail(&peer->staged_packet_queue, skb);
+		net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr);
+	}
+
+	packet_send_staged_packets(peer);
+}
+
+#define skb_walk_null_queue_safe(first, skb, next) for (skb = first, next = skb->next; skb; skb = next, next = skb ? skb->next : NULL)
+static inline void skb_free_null_queue(struct sk_buff *first)
+{
+	struct sk_buff *skb, *next;
+
+	skb_walk_null_queue_safe(first, skb, next)
+		dev_kfree_skb(skb);
+}
+
+static void packet_create_data_done(struct sk_buff *first, struct wireguard_peer *peer)
+{
+	struct sk_buff *skb, *next;
+	bool is_keepalive, data_sent = false;
+
+	timers_any_authenticated_packet_traversal(peer);
+	skb_walk_null_queue_safe(first, skb, next) {
+		is_keepalive = skb->len == message_data_len(0);
+		if (likely(!socket_send_skb_to_peer(peer, skb, PACKET_CB(skb)->ds) && !is_keepalive))
+			data_sent = true;
+	}
+
+	if (likely(data_sent))
+		timers_data_sent(peer);
+
+	keep_key_fresh(peer);
+}
+
+void packet_tx_worker(struct work_struct *work)
+{
+	struct crypt_queue *queue = container_of(work, struct crypt_queue, work);
+	struct wireguard_peer *peer;
+	struct noise_keypair *keypair;
+	struct sk_buff *first;
+	enum packet_state state;
+
+	spin_lock_bh(&queue->ring.consumer_lock);
+	while ((first = __ptr_ring_peek(&queue->ring)) != NULL && (state = atomic_read(&PACKET_CB(first)->state)) != PACKET_STATE_UNCRYPTED) {
+		__ptr_ring_discard_one(&queue->ring);
+		peer = PACKET_PEER(first);
+		keypair = PACKET_CB(first)->keypair;
+
+		if (likely(state == PACKET_STATE_CRYPTED))
+			packet_create_data_done(first, peer);
+		else
+			skb_free_null_queue(first);
+
+		noise_keypair_put(keypair);
+		peer_put(peer);
+	}
+	spin_unlock_bh(&queue->ring.consumer_lock);
+}
+
+void packet_encrypt_worker(struct work_struct *work)
+{
+	struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr;
+	struct sk_buff *first, *skb, *next;
+	bool have_simd = chacha20poly1305_init_simd();
+
+	while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {
+		enum packet_state state = PACKET_STATE_CRYPTED;
+
+		skb_walk_null_queue_safe(first, skb, next) {
+			if (likely(skb_encrypt(skb, PACKET_CB(first)->keypair, have_simd)))
+				skb_reset(skb);
+			else {
+				state = PACKET_STATE_DEAD;
+				break;
+			}
+		}
+		queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, state);
+	}
+	chacha20poly1305_deinit_simd(have_simd);
+}
+
+static void packet_create_data(struct sk_buff *first)
+{
+	struct wireguard_peer *peer = PACKET_PEER(first);
+	struct wireguard_device *wg = peer->device;
+	int ret;
+
+	ret = queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first, wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu);
+	if (likely(!ret))
+		return; /* Successful. No need to fall through to drop references below. */
+
+	if (ret == -EPIPE)
+		queue_enqueue_per_peer(&peer->tx_queue, first, PACKET_STATE_DEAD);
+	else {
+		peer_put(peer);
+		noise_keypair_put(PACKET_CB(first)->keypair);
+		skb_free_null_queue(first);
+	}
+}
+
+void packet_send_staged_packets(struct wireguard_peer *peer)
+{
+	struct noise_keypair *keypair;
+	struct noise_symmetric_key *key;
+	struct sk_buff_head packets;
+	struct sk_buff *skb;
+
+	/* Steal the current queue into our local one. */
+	__skb_queue_head_init(&packets);
+	spin_lock_bh(&peer->staged_packet_queue.lock);
+	skb_queue_splice_init(&peer->staged_packet_queue, &packets);
+	spin_unlock_bh(&peer->staged_packet_queue.lock);
+	if (unlikely(skb_queue_empty(&packets)))
+		return;
+
+	/* First we make sure we have a valid reference to a valid key. */
+	rcu_read_lock_bh();
+	keypair = noise_keypair_get(rcu_dereference_bh(peer->keypairs.current_keypair));
+	rcu_read_unlock_bh();
+	if (unlikely(!keypair))
+		goto out_nokey;
+	key = &keypair->sending;
+	if (unlikely(!key || !key->is_valid))
+		goto out_nokey;
+	if (unlikely(time_is_before_eq_jiffies64(key->birthdate + REJECT_AFTER_TIME)))
+		goto out_invalid;
+
+	/* After we know we have a somewhat valid key, we now try to assign nonces to
+	 * all of the packets in the queue. If we can't assign nonces for all of them,
+	 * we just consider it a failure and wait for the next handshake.
+	 */
+	skb_queue_walk(&packets, skb) {
+		PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0 /* No outer TOS: no leak. TODO: should we use flowi->tos as outer? */, ip_hdr(skb), skb);
+		PACKET_CB(skb)->nonce = atomic64_inc_return(&key->counter.counter) - 1;
+		if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES))
+			goto out_invalid;
+	}
+
+	packets.prev->next = NULL;
+	peer_rcu_get(keypair->entry.peer);
+	PACKET_CB(packets.next)->keypair = keypair;
+	packet_create_data(packets.next);
+	return;
+
+out_invalid:
+	key->is_valid = false;
+out_nokey:
+	noise_keypair_put(keypair);
+
+	/* We orphan the packets if we're waiting on a handshake, so that they
+	 * don't block a socket's pool.
+	 */
+	skb_queue_walk(&packets, skb)
+		skb_orphan(skb);
+	/* Then we put them back on the top of the queue. We're not too concerned about
+	 * accidently getting things a little out of order if packets are being added
+	 * really fast, because this queue is for before packets can even be sent and
+	 * it's small anyway.
+	 */
+	spin_lock_bh(&peer->staged_packet_queue.lock);
+	skb_queue_splice(&packets, &peer->staged_packet_queue);
+	spin_unlock_bh(&peer->staged_packet_queue.lock);
+
+	/* If we're exiting because there's something wrong with the key, it means
+	 * we should initiate a new handshake.
+	 */
+	packet_send_queued_handshake_initiation(peer, false);
+}
diff --git b/net/wireguard/socket.c b/net/wireguard/socket.c
new file mode 100644
index 0000000..5170dc8
--- /dev/null
+++ b/net/wireguard/socket.c
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "device.h"
+#include "peer.h"
+#include "socket.h"
+#include "queueing.h"
+#include "messages.h"
+
+#include <linux/ctype.h>
+#include <linux/net.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+#include <linux/inetdevice.h>
+#include <net/udp_tunnel.h>
+#include <net/ipv6.h>
+
+static inline int send4(struct wireguard_device *wg, struct sk_buff *skb, struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
+{
+	struct flowi4 fl = {
+		.saddr = endpoint->src4.s_addr,
+		.daddr = endpoint->addr4.sin_addr.s_addr,
+		.fl4_dport = endpoint->addr4.sin_port,
+		.flowi4_mark = wg->fwmark,
+		.flowi4_proto = IPPROTO_UDP
+	};
+	struct rtable *rt = NULL;
+	struct sock *sock;
+	int ret = 0;
+
+	skb->next = skb->prev = NULL;
+	skb->dev = wg->dev;
+	skb->mark = wg->fwmark;
+
+	rcu_read_lock_bh();
+	sock = rcu_dereference_bh(wg->sock4);
+	fl.fl4_sport = inet_sk(sock)->inet_sport;
+
+	if (unlikely(!sock)) {
+		ret = -ENONET;
+		goto err;
+	}
+
+	if (cache)
+		rt = dst_cache_get_ip4(cache, &fl.saddr);
+
+	if (!rt) {
+		security_sk_classify_flow(sock, flowi4_to_flowi(&fl));
+		if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, fl.saddr, RT_SCOPE_HOST))) {
+			endpoint->src4.s_addr = *(__force __be32 *)&endpoint->src_if4 = fl.saddr = 0;
+			if (cache)
+				dst_cache_reset(cache);
+		}
+		rt = ip_route_output_flow(sock_net(sock), &fl, sock);
+		if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && rt->dst.dev->ifindex != endpoint->src_if4)))) {
+			endpoint->src4.s_addr = *(__force __be32 *)&endpoint->src_if4 = fl.saddr = 0;
+			if (cache)
+				dst_cache_reset(cache);
+			if (!IS_ERR(rt))
+				ip_rt_put(rt);
+			rt = ip_route_output_flow(sock_net(sock), &fl, sock);
+		}
+		if (unlikely(IS_ERR(rt))) {
+			ret = PTR_ERR(rt);
+			net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", wg->dev->name, &endpoint->addr, ret);
+			goto err;
+		} else if (unlikely(rt->dst.dev == skb->dev)) {
+			ip_rt_put(rt);
+			ret = -ELOOP;
+			net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", wg->dev->name, &endpoint->addr);
+			goto err;
+		}
+		if (cache)
+			dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
+	}
+	udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, fl.fl4_dport, false, false);
+	goto out;
+
+err:
+	kfree_skb(skb);
+out:
+	rcu_read_unlock_bh();
+	return ret;
+}
+
+static inline int send6(struct wireguard_device *wg, struct sk_buff *skb, struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct flowi6 fl = {
+		.saddr = endpoint->src6,
+		.daddr = endpoint->addr6.sin6_addr,
+		.fl6_dport = endpoint->addr6.sin6_port,
+		.flowi6_mark = wg->fwmark,
+		.flowi6_oif = endpoint->addr6.sin6_scope_id,
+		.flowi6_proto = IPPROTO_UDP
+		/* TODO: addr->sin6_flowinfo */
+	};
+	struct dst_entry *dst = NULL;
+	struct sock *sock;
+	int ret = 0;
+
+	skb->next = skb->prev = NULL;
+	skb->dev = wg->dev;
+	skb->mark = wg->fwmark;
+
+	rcu_read_lock_bh();
+	sock = rcu_dereference_bh(wg->sock6);
+	fl.fl6_sport = inet_sk(sock)->inet_sport;
+
+	if (unlikely(!sock)) {
+		ret = -ENONET;
+		goto err;
+	}
+
+	if (cache)
+		dst = dst_cache_get_ip6(cache, &fl.saddr);
+
+	if (!dst) {
+		security_sk_classify_flow(sock, flowi6_to_flowi(&fl));
+		if (unlikely(!ipv6_addr_any(&fl.saddr) && !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) {
+			endpoint->src6 = fl.saddr = in6addr_any;
+			if (cache)
+				dst_cache_reset(cache);
+		}
+		ret = ipv6_stub->ipv6_dst_lookup(sock_net(sock), sock, &dst, &fl);
+		if (unlikely(ret)) {
+			net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", wg->dev->name, &endpoint->addr, ret);
+			goto err;
+		} else if (unlikely(dst->dev == skb->dev)) {
+			dst_release(dst);
+			ret = -ELOOP;
+			net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", wg->dev->name, &endpoint->addr);
+			goto err;
+		}
+		if (cache)
+			dst_cache_set_ip6(cache, dst, &fl.saddr);
+	}
+
+	udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, ip6_dst_hoplimit(dst), 0, fl.fl6_sport, fl.fl6_dport, false);
+	goto out;
+
+err:
+	kfree_skb(skb);
+out:
+	rcu_read_unlock_bh();
+	return ret;
+#else
+	return -EAFNOSUPPORT;
+#endif
+}
+
+int socket_send_skb_to_peer(struct wireguard_peer *peer, struct sk_buff *skb, u8 ds)
+{
+	size_t skb_len = skb->len;
+	int ret = -EAFNOSUPPORT;
+
+	read_lock_bh(&peer->endpoint_lock);
+	if (peer->endpoint.addr.sa_family == AF_INET)
+		ret = send4(peer->device, skb, &peer->endpoint, ds, &peer->endpoint_cache);
+	else if (peer->endpoint.addr.sa_family == AF_INET6)
+		ret = send6(peer->device, skb, &peer->endpoint, ds, &peer->endpoint_cache);
+	if (likely(!ret))
+		peer->tx_bytes += skb_len;
+	read_unlock_bh(&peer->endpoint_lock);
+
+	return ret;
+}
+
+int socket_send_buffer_to_peer(struct wireguard_peer *peer, void *buffer, size_t len, u8 ds)
+{
+	struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
+
+	if (unlikely(!skb))
+		return -ENOMEM;
+
+	skb_reserve(skb, SKB_HEADER_LEN);
+	skb_set_inner_network_header(skb, 0);
+	memcpy(skb_put(skb, len), buffer, len);
+	return socket_send_skb_to_peer(peer, skb, ds);
+}
+
+int socket_send_buffer_as_reply_to_skb(struct wireguard_device *wg, struct sk_buff *in_skb, void *out_buffer, size_t len)
+{
+	int ret = 0;
+	struct sk_buff *skb;
+	struct endpoint endpoint;
+
+	if (unlikely(!in_skb))
+		return -EINVAL;
+	ret = socket_endpoint_from_skb(&endpoint, in_skb);
+	if (unlikely(ret < 0))
+		return ret;
+
+	skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return -ENOMEM;
+	skb_reserve(skb, SKB_HEADER_LEN);
+	skb_set_inner_network_header(skb, 0);
+	memcpy(skb_put(skb, len), out_buffer, len);
+
+	if (endpoint.addr.sa_family == AF_INET)
+		ret = send4(wg, skb, &endpoint, 0, NULL);
+	else if (endpoint.addr.sa_family == AF_INET6)
+		ret = send6(wg, skb, &endpoint, 0, NULL);
+	/* No other possibilities if the endpoint is valid, which it is, as we checked above. */
+
+	return ret;
+}
+
+int socket_endpoint_from_skb(struct endpoint *endpoint, const struct sk_buff *skb)
+{
+	memset(endpoint, 0, sizeof(struct endpoint));
+	if (skb->protocol == htons(ETH_P_IP)) {
+		endpoint->addr4.sin_family = AF_INET;
+		endpoint->addr4.sin_port = udp_hdr(skb)->source;
+		endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
+		endpoint->src4.s_addr = ip_hdr(skb)->daddr;
+		endpoint->src_if4 = skb->skb_iif;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		endpoint->addr6.sin6_family = AF_INET6;
+		endpoint->addr6.sin6_port = udp_hdr(skb)->source;
+		endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr;
+		endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id(&ipv6_hdr(skb)->saddr, skb->skb_iif);
+		endpoint->src6 = ipv6_hdr(skb)->daddr;
+	} else
+		return -EINVAL;
+	return 0;
+}
+
+static inline bool endpoint_eq(const struct endpoint *a, const struct endpoint *b)
+{
+	return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET &&
+		a->addr4.sin_port == b->addr4.sin_port && a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr &&
+		a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) ||
+	       (a->addr.sa_family == AF_INET6 && b->addr.sa_family == AF_INET6 &&
+		a->addr6.sin6_port == b->addr6.sin6_port && ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) &&
+		a->addr6.sin6_scope_id == b->addr6.sin6_scope_id && ipv6_addr_equal(&a->src6, &b->src6)) ||
+	       unlikely(!a->addr.sa_family && !b->addr.sa_family);
+}
+
+void socket_set_peer_endpoint(struct wireguard_peer *peer, const struct endpoint *endpoint)
+{
+	/* First we check unlocked, in order to optimize, since it's pretty rare
+	 * that an endpoint will change. If we happen to be mid-write, and two
+	 * CPUs wind up writing the same thing or something slightly different,
+	 * it doesn't really matter much either.
+	 */
+	if (endpoint_eq(endpoint, &peer->endpoint))
+		return;
+	write_lock_bh(&peer->endpoint_lock);
+	if (endpoint->addr.sa_family == AF_INET) {
+		peer->endpoint.addr4 = endpoint->addr4;
+		peer->endpoint.src4 = endpoint->src4;
+		peer->endpoint.src_if4 = endpoint->src_if4;
+	} else if (endpoint->addr.sa_family == AF_INET6) {
+		peer->endpoint.addr6 = endpoint->addr6;
+		peer->endpoint.src6 = endpoint->src6;
+	} else
+		goto out;
+	dst_cache_reset(&peer->endpoint_cache);
+out:
+	write_unlock_bh(&peer->endpoint_lock);
+}
+
+void socket_set_peer_endpoint_from_skb(struct wireguard_peer *peer, const struct sk_buff *skb)
+{
+	struct endpoint endpoint;
+
+	if (!socket_endpoint_from_skb(&endpoint, skb))
+		socket_set_peer_endpoint(peer, &endpoint);
+}
+
+void socket_clear_peer_endpoint_src(struct wireguard_peer *peer)
+{
+	write_lock_bh(&peer->endpoint_lock);
+	memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
+	dst_cache_reset(&peer->endpoint_cache);
+	write_unlock_bh(&peer->endpoint_lock);
+}
+
+static int receive(struct sock *sk, struct sk_buff *skb)
+{
+	struct wireguard_device *wg;
+
+	if (unlikely(!sk))
+		goto err;
+	wg = sk->sk_user_data;
+	if (unlikely(!wg))
+		goto err;
+	packet_receive(wg, skb);
+	return 0;
+
+err:
+	kfree_skb(skb);
+	return 0;
+}
+
+static inline void sock_free(struct sock *sock)
+{
+	if (unlikely(!sock))
+		return;
+	sk_clear_memalloc(sock);
+	udp_tunnel_sock_release(sock->sk_socket);
+}
+
+static inline void set_sock_opts(struct socket *sock)
+{
+	sock->sk->sk_allocation = GFP_ATOMIC;
+	sock->sk->sk_sndbuf = INT_MAX;
+	sk_set_memalloc(sock->sk);
+}
+
+int socket_init(struct wireguard_device *wg, u16 port)
+{
+	int ret;
+	struct udp_tunnel_sock_cfg cfg = {
+		.sk_user_data = wg,
+		.encap_type = 1,
+		.encap_rcv = receive
+	};
+	struct socket *new4 = NULL, *new6 = NULL;
+	struct udp_port_cfg port4 = {
+		.family = AF_INET,
+		.local_ip.s_addr = htonl(INADDR_ANY),
+		.local_udp_port = htons(port),
+		.use_udp_checksums = true
+	};
+#if IS_ENABLED(CONFIG_IPV6)
+	int retries = 0;
+	struct udp_port_cfg port6 = {
+		.family = AF_INET6,
+		.local_ip6 = IN6ADDR_ANY_INIT,
+		.use_udp6_tx_checksums = true,
+		.use_udp6_rx_checksums = true,
+		.ipv6_v6only = true
+	};
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+retry:
+#endif
+
+	ret = udp_sock_create(wg->creating_net, &port4, &new4);
+	if (ret < 0) {
+		pr_err("%s: Could not create IPv4 socket\n", wg->dev->name);
+		return ret;
+	}
+	set_sock_opts(new4);
+	setup_udp_tunnel_sock(wg->creating_net, new4, &cfg);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (ipv6_mod_enabled()) {
+		port6.local_udp_port = inet_sk(new4->sk)->inet_sport;
+		ret = udp_sock_create(wg->creating_net, &port6, &new6);
+		if (ret < 0) {
+			udp_tunnel_sock_release(new4);
+			if (ret == -EADDRINUSE && !port && retries++ < 100)
+				goto retry;
+			pr_err("%s: Could not create IPv6 socket\n", wg->dev->name);
+			return ret;
+		}
+		set_sock_opts(new6);
+		setup_udp_tunnel_sock(wg->creating_net, new6, &cfg);
+	}
+#endif
+
+	socket_reinit(wg, new4 ? new4->sk : NULL, new6 ? new6->sk : NULL);
+	return 0;
+}
+
+void socket_reinit(struct wireguard_device *wg, struct sock *new4, struct sock *new6)
+{
+	struct sock *old4, *old6;
+
+	mutex_lock(&wg->socket_update_lock);
+	old4 = rcu_dereference_protected(wg->sock4, lockdep_is_held(&wg->socket_update_lock));
+	old6 = rcu_dereference_protected(wg->sock6, lockdep_is_held(&wg->socket_update_lock));
+	rcu_assign_pointer(wg->sock4, new4);
+	rcu_assign_pointer(wg->sock6, new6);
+	if (new4)
+		wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
+	mutex_unlock(&wg->socket_update_lock);
+	synchronize_rcu_bh();
+	synchronize_net();
+	sock_free(old4);
+	sock_free(old6);
+}
diff --git b/net/wireguard/socket.h b/net/wireguard/socket.h
new file mode 100644
index 0000000..a0d3204
--- /dev/null
+++ b/net/wireguard/socket.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_SOCKET_H
+#define _WG_SOCKET_H
+
+#include <linux/netdevice.h>
+#include <linux/udp.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+
+int socket_init(struct wireguard_device *wg, u16 port);
+void socket_reinit(struct wireguard_device *wg, struct sock *new4, struct sock *new6);
+int socket_send_buffer_to_peer(struct wireguard_peer *peer, void *data, size_t len, u8 ds);
+int socket_send_skb_to_peer(struct wireguard_peer *peer, struct sk_buff *skb, u8 ds);
+int socket_send_buffer_as_reply_to_skb(struct wireguard_device *wg, struct sk_buff *in_skb, void *out_buffer, size_t len);
+
+int socket_endpoint_from_skb(struct endpoint *endpoint, const struct sk_buff *skb);
+void socket_set_peer_endpoint(struct wireguard_peer *peer, const struct endpoint *endpoint);
+void socket_set_peer_endpoint_from_skb(struct wireguard_peer *peer, const struct sk_buff *skb);
+void socket_clear_peer_endpoint_src(struct wireguard_peer *peer);
+
+#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG)
+#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do { \
+	struct endpoint __endpoint; \
+	socket_endpoint_from_skb(&__endpoint, skb); \
+	net_dbg_ratelimited(fmt, dev, &__endpoint.addr, ##__VA_ARGS__); \
+} while (0)
+#else
+#define net_dbg_skb_ratelimited(fmt, skb, ...)
+#endif
+
+#endif /* _WG_SOCKET_H */
diff --git b/net/wireguard/timers.c b/net/wireguard/timers.c
new file mode 100644
index 0000000..833d77b
--- /dev/null
+++ b/net/wireguard/timers.c
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include "timers.h"
+#include "device.h"
+#include "peer.h"
+#include "queueing.h"
+#include "socket.h"
+
+/*
+ * Timer for retransmitting the handshake if we don't hear back after `REKEY_TIMEOUT + jitter` ms
+ * Timer for sending empty packet if we have received a packet but after have not sent one for `KEEPALIVE_TIMEOUT` ms
+ * Timer for initiating new handshake if we have sent a packet but after have not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT)` ms
+ * Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms if no new keys have been received
+ * Timer for, if enabled, sending an empty authenticated packet every user-specified seconds
+ */
+
+/* This rounds the time down to the closest power of two of the closest quarter second. */
+static inline unsigned long slack_time(unsigned long time)
+{
+	return time & ~(roundup_pow_of_two(HZ / 4) - 1);
+}
+
+#define peer_get_from_timer(timer_name) \
+	struct wireguard_peer *peer = peer_rcu_get(from_timer(peer, timer, timer_name)); \
+	if (unlikely(!peer)) \
+		return;
+
+static inline bool timers_active(struct wireguard_peer *peer)
+{
+	return netif_running(peer->device->dev) && !list_empty(&peer->peer_list);
+}
+
+static void expired_retransmit_handshake(struct timer_list *timer)
+{
+	peer_get_from_timer(timer_retransmit_handshake);
+
+	if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) {
+		pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2);
+
+		if (likely(timers_active(peer)))
+			del_timer(&peer->timer_send_keepalive);
+		/* We drop all packets without a keypair and don't try again,
+		 * if we try unsuccessfully for too long to make a handshake.
+		 */
+		skb_queue_purge(&peer->staged_packet_queue);
+
+		/* We set a timer for destroying any residue that might be left
+		 * of a partial exchange.
+		 */
+		if (likely(timers_active(peer)) && !timer_pending(&peer->timer_zero_key_material))
+			mod_timer(&peer->timer_zero_key_material, jiffies + (REJECT_AFTER_TIME * 3));
+	} else {
+		++peer->timer_handshake_attempts;
+		pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, REKEY_TIMEOUT / HZ, peer->timer_handshake_attempts + 1);
+
+		/* We clear the endpoint address src address, in case this is the cause of trouble. */
+		socket_clear_peer_endpoint_src(peer);
+
+		packet_send_queued_handshake_initiation(peer, true);
+	}
+	peer_put(peer);
+}
+
+static void expired_send_keepalive(struct timer_list *timer)
+{
+	peer_get_from_timer(timer_send_keepalive);
+
+	packet_send_keepalive(peer);
+	if (peer->timer_need_another_keepalive) {
+		peer->timer_need_another_keepalive = false;
+		if (likely(timers_active(peer)))
+			mod_timer(&peer->timer_send_keepalive, jiffies + KEEPALIVE_TIMEOUT);
+	}
+	peer_put(peer);
+}
+
+static void expired_new_handshake(struct timer_list *timer)
+{
+	peer_get_from_timer(timer_new_handshake);
+
+	pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) / HZ);
+	/* We clear the endpoint address src address, in case this is the cause of trouble. */
+	socket_clear_peer_endpoint_src(peer);
+	packet_send_queued_handshake_initiation(peer, false);
+	peer_put(peer);
+}
+
+static void expired_zero_key_material(struct timer_list *timer)
+{
+	peer_get_from_timer(timer_zero_key_material);
+
+	if (!queue_work(peer->device->handshake_send_wq, &peer->clear_peer_work)) /* Takes our reference. */
+		peer_put(peer); /* If the work was already on the queue, we want to drop the extra reference */
+}
+static void queued_expired_zero_key_material(struct work_struct *work)
+{
+	struct wireguard_peer *peer = container_of(work, struct wireguard_peer, clear_peer_work);
+
+	pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (REJECT_AFTER_TIME * 3) / HZ);
+	noise_handshake_clear(&peer->handshake);
+	noise_keypairs_clear(&peer->keypairs);
+	peer_put(peer);
+}
+
+static void expired_send_persistent_keepalive(struct timer_list *timer)
+{
+	peer_get_from_timer(timer_persistent_keepalive);
+
+	if (likely(peer->persistent_keepalive_interval)) {
+		if (likely(timers_active(peer)))
+			del_timer(&peer->timer_send_keepalive);
+		packet_send_keepalive(peer);
+	}
+	peer_put(peer);
+}
+
+/* Should be called after an authenticated data packet is sent. */
+void timers_data_sent(struct wireguard_peer *peer)
+{
+	if (likely(timers_active(peer)))
+		del_timer(&peer->timer_send_keepalive);
+
+	if (likely(timers_active(peer)) && !timer_pending(&peer->timer_new_handshake))
+		mod_timer(&peer->timer_new_handshake, jiffies + KEEPALIVE_TIMEOUT + REKEY_TIMEOUT);
+}
+
+/* Should be called after an authenticated data packet is received. */
+void timers_data_received(struct wireguard_peer *peer)
+{
+	if (likely(timers_active(peer))) {
+		if (!timer_pending(&peer->timer_send_keepalive))
+			mod_timer(&peer->timer_send_keepalive, jiffies + KEEPALIVE_TIMEOUT);
+		else
+			peer->timer_need_another_keepalive = true;
+	}
+}
+
+/* Should be called after any type of authenticated packet is received -- keepalive or data. */
+void timers_any_authenticated_packet_received(struct wireguard_peer *peer)
+{
+	if (likely(timers_active(peer)))
+		del_timer(&peer->timer_new_handshake);
+}
+
+/* Should be called after a handshake initiation message is sent. */
+void timers_handshake_initiated(struct wireguard_peer *peer)
+{
+	if (likely(timers_active(peer))) {
+		del_timer(&peer->timer_send_keepalive);
+		mod_timer(&peer->timer_retransmit_handshake, slack_time(jiffies + REKEY_TIMEOUT + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX)));
+	}
+}
+
+/* Should be called after a handshake response message is received and processed or when getting key confirmation via the first data message. */
+void timers_handshake_complete(struct wireguard_peer *peer)
+{
+	if (likely(timers_active(peer)))
+		del_timer(&peer->timer_retransmit_handshake);
+	peer->timer_handshake_attempts = 0;
+	peer->sent_lastminute_handshake = false;
+	getnstimeofday(&peer->walltime_last_handshake);
+}
+
+/* Should be called after an ephemeral key is created, which is before sending a handshake response or after receiving a handshake response. */
+void timers_session_derived(struct wireguard_peer *peer)
+{
+	if (likely(timers_active(peer)))
+		mod_timer(&peer->timer_zero_key_material, jiffies + (REJECT_AFTER_TIME * 3));
+}
+
+/* Should be called before a packet with authentication -- data, keepalive, either handshake -- is sent, or after one is received. */
+void timers_any_authenticated_packet_traversal(struct wireguard_peer *peer)
+{
+	if (peer->persistent_keepalive_interval && likely(timers_active(peer)))
+		mod_timer(&peer->timer_persistent_keepalive, slack_time(jiffies + peer->persistent_keepalive_interval));
+}
+
+void timers_init(struct wireguard_peer *peer)
+{
+	timer_setup(&peer->timer_retransmit_handshake, expired_retransmit_handshake, 0);
+	timer_setup(&peer->timer_send_keepalive, expired_send_keepalive, 0);
+	timer_setup(&peer->timer_new_handshake, expired_new_handshake, 0);
+	timer_setup(&peer->timer_zero_key_material, expired_zero_key_material, 0);
+	timer_setup(&peer->timer_persistent_keepalive, expired_send_persistent_keepalive, 0);
+	INIT_WORK(&peer->clear_peer_work, queued_expired_zero_key_material);
+}
+
+void timers_stop(struct wireguard_peer *peer)
+{
+	del_timer_sync(&peer->timer_retransmit_handshake);
+	del_timer_sync(&peer->timer_send_keepalive);
+	del_timer_sync(&peer->timer_new_handshake);
+	del_timer_sync(&peer->timer_zero_key_material);
+	del_timer_sync(&peer->timer_persistent_keepalive);
+	flush_work(&peer->clear_peer_work);
+}
diff --git b/net/wireguard/timers.h b/net/wireguard/timers.h
new file mode 100644
index 0000000..65bac3f
--- /dev/null
+++ b/net/wireguard/timers.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _WG_TIMERS_H
+#define _WG_TIMERS_H
+
+struct wireguard_peer;
+
+void timers_init(struct wireguard_peer *peer);
+void timers_stop(struct wireguard_peer *peer);
+void timers_data_sent(struct wireguard_peer *peer);
+void timers_data_received(struct wireguard_peer *peer);
+void timers_any_authenticated_packet_received(struct wireguard_peer *peer);
+void timers_handshake_initiated(struct wireguard_peer *peer);
+void timers_handshake_complete(struct wireguard_peer *peer);
+void timers_session_derived(struct wireguard_peer *peer);
+void timers_any_authenticated_packet_traversal(struct wireguard_peer *peer);
+
+#endif /* _WG_TIMERS_H */
diff --git b/net/wireguard/uapi/wireguard.h b/net/wireguard/uapi/wireguard.h
new file mode 100644
index 0000000..2c9fcb5
--- /dev/null
+++ b/net/wireguard/uapi/wireguard.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR MIT)
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Documentation
+ * =============
+ *
+ * The below enums and macros are for interfacing with WireGuard, using generic
+ * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two
+ * methods: get and set. Note that while they share many common attributes, these
+ * two functions actually accept a slightly different set of inputs and outputs.
+ *
+ * WG_CMD_GET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain
+ * one but not both of:
+ *
+ *	WGDEVICE_A_IFINDEX: NLA_U32
+ *	WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ *
+ * The kernel will then return several messages (NLM_F_MULTI) containing the following
+ * tree of nested items:
+ *
+ *	WGDEVICE_A_IFINDEX: NLA_U32
+ *	WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ *	WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN
+ *	WGDEVICE_A_PUBLIC_KEY: len WG_KEY_LEN
+ *	WGDEVICE_A_LISTEN_PORT: NLA_U16
+ *	WGDEVICE_A_FWMARK: NLA_U32
+ *	WGDEVICE_A_PEERS: NLA_NESTED
+ *		0: NLA_NESTED
+ *			WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN
+ *			WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN
+ *			WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6
+ *			WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16
+ *			WGPEER_A_LAST_HANDSHAKE_TIME: struct timespec
+ *			WGPEER_A_RX_BYTES: NLA_U64
+ *			WGPEER_A_TX_BYTES: NLA_U64
+ *			WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ *				0: NLA_NESTED
+ *					WGALLOWEDIP_A_FAMILY: NLA_U16
+ *					WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr
+ *					WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ *				1: NLA_NESTED
+ *					...
+ *				2: NLA_NESTED
+ *					...
+ *				...
+ *		1: NLA_NESTED
+ *			...
+ *		...
+ *
+ * It is possible that all of the allowed IPs of a single peer will not
+ * fit within a single netlink message. In that case, the same peer will
+ * be written in the following message, except it will only contain
+ * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several
+ * times in a row for the same peer. It is then up to the receiver to
+ * coalesce adjacent peers. Likewise, it is possible that all peers will
+ * not fit within a single message. So, subsequent peers will be sent
+ * in following messages, except those will only contain WGDEVICE_A_IFNAME
+ * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these
+ * messages to form the complete list of peers.
+ *
+ * Since this is an NLA_F_DUMP command, the final message will always be
+ * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message
+ * contains an integer error code. It is either zero or a negative error
+ * code corresponding to the errno.
+ *
+ * WG_CMD_SET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST. The command should contain the following
+ * tree of nested items, containing one but not both of WGDEVICE_A_IFINDEX
+ * and WGDEVICE_A_IFNAME:
+ *
+ *	WGDEVICE_A_IFINDEX: NLA_U32
+ *	WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ *	WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current
+ *			  peers should be removed prior to adding the list below.
+ *	WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove
+ *	WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly
+ *	WGDEVICE_A_FWMARK: NLA_U32, 0 to disable
+ *	WGDEVICE_A_PEERS: NLA_NESTED
+ *		0: NLA_NESTED
+ *			WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN
+ *			WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the specified peer
+ *					should be removed rather than added/updated and/or
+ *					WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed IPs of
+ *					this peer should be removed prior to adding the list below.
+ *			WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove
+ *			WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6
+ *			WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable
+ *			WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ *				0: NLA_NESTED
+ *					WGALLOWEDIP_A_FAMILY: NLA_U16
+ *					WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr
+ *					WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ *				1: NLA_NESTED
+ *					...
+ *				2: NLA_NESTED
+ *					...
+ *				...
+ *		1: NLA_NESTED
+ *			...
+ *		...
+ *
+ * It is possible that the amount of configuration data exceeds that of
+ * the maximum message length accepted by the kernel. In that case,
+ * several messages should be sent one after another, with each
+ * successive one filling in information not contained in the prior. Note
+ * that if WGDEVICE_F_REPLACE_PEERS is specified in the first message, it
+ * probably should not be specified in fragments that come after, so that
+ * the list of peers is only cleared the first time but appened after.
+ * Likewise for peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the
+ * first message of a peer, it likely should not be specified in subsequent
+ * fragments.
+ *
+ * If an error occurs, NLMSG_ERROR will reply containing an errno.
+ */
+
+#ifndef _WG_UAPI_WIREGUARD_H
+#define _WG_UAPI_WIREGUARD_H
+
+#define WG_GENL_NAME "wireguard"
+#define WG_GENL_VERSION 1
+
+#define WG_KEY_LEN 32
+
+enum wg_cmd {
+	WG_CMD_GET_DEVICE,
+	WG_CMD_SET_DEVICE,
+	__WG_CMD_MAX
+};
+#define WG_CMD_MAX (__WG_CMD_MAX - 1)
+
+enum wgdevice_flag {
+	WGDEVICE_F_REPLACE_PEERS = 1U << 0
+};
+enum wgdevice_attribute {
+	WGDEVICE_A_UNSPEC,
+	WGDEVICE_A_IFINDEX,
+	WGDEVICE_A_IFNAME,
+	WGDEVICE_A_PRIVATE_KEY,
+	WGDEVICE_A_PUBLIC_KEY,
+	WGDEVICE_A_FLAGS,
+	WGDEVICE_A_LISTEN_PORT,
+	WGDEVICE_A_FWMARK,
+	WGDEVICE_A_PEERS,
+	__WGDEVICE_A_LAST
+};
+#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1)
+
+enum wgpeer_flag {
+	WGPEER_F_REMOVE_ME = 1U << 0,
+	WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1
+};
+enum wgpeer_attribute {
+	WGPEER_A_UNSPEC,
+	WGPEER_A_PUBLIC_KEY,
+	WGPEER_A_PRESHARED_KEY,
+	WGPEER_A_FLAGS,
+	WGPEER_A_ENDPOINT,
+	WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
+	WGPEER_A_LAST_HANDSHAKE_TIME,
+	WGPEER_A_RX_BYTES,
+	WGPEER_A_TX_BYTES,
+	WGPEER_A_ALLOWEDIPS,
+	__WGPEER_A_LAST
+};
+#define WGPEER_A_MAX (__WGPEER_A_LAST - 1)
+
+enum wgallowedip_attribute {
+	WGALLOWEDIP_A_UNSPEC,
+	WGALLOWEDIP_A_FAMILY,
+	WGALLOWEDIP_A_IPADDR,
+	WGALLOWEDIP_A_CIDR_MASK,
+	__WGALLOWEDIP_A_LAST
+};
+#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1)
+
+#endif /* _WG_UAPI_WIREGUARD_H */
diff --git b/net/wireguard/version.h b/net/wireguard/version.h
new file mode 100644
index 0000000..8ed5447
--- /dev/null
+++ b/net/wireguard/version.h
@@ -0,0 +1 @@
+#define WIREGUARD_VERSION "0.0.20171127"
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
index ae7ff6f..7659beb 100644
--- a/samples/seccomp/Makefile
+++ b/samples/seccomp/Makefile
@@ -20,6 +20,7 @@ bpf-direct-objs := bpf-direct.o
 # Try to match the kernel target.
 ifndef CROSS_COMPILE
 ifndef CONFIG_64BIT
+ifndef CONFIG_ARM
 
 # s390 has -m31 flag to build 31 bit binaries
 ifndef CONFIG_S390
@@ -46,3 +47,4 @@ ifndef CONFIG_MIPS
 always := $(hostprogs-y)
 endif
 endif
+endif
diff --git a/scripts/dtc/checks.c b/scripts/dtc/checks.c
index 386f956..38f548e 100644
--- a/scripts/dtc/checks.c
+++ b/scripts/dtc/checks.c
@@ -40,16 +40,11 @@ enum checkstatus {
 
 struct check;
 
-typedef void (*tree_check_fn)(struct check *c, struct node *dt);
-typedef void (*node_check_fn)(struct check *c, struct node *dt, struct node *node);
-typedef void (*prop_check_fn)(struct check *c, struct node *dt,
-			      struct node *node, struct property *prop);
+typedef void (*check_fn)(struct check *c, struct dt_info *dti, struct node *node);
 
 struct check {
 	const char *name;
-	tree_check_fn tree_fn;
-	node_check_fn node_fn;
-	prop_check_fn prop_fn;
+	check_fn fn;
 	void *data;
 	bool warn, error;
 	enum checkstatus status;
@@ -58,57 +53,35 @@ struct check {
 	struct check **prereq;
 };
 
-#define CHECK_ENTRY(nm, tfn, nfn, pfn, d, w, e, ...)	       \
-	static struct check *nm##_prereqs[] = { __VA_ARGS__ }; \
-	static struct check nm = { \
-		.name = #nm, \
-		.tree_fn = (tfn), \
-		.node_fn = (nfn), \
-		.prop_fn = (pfn), \
-		.data = (d), \
-		.warn = (w), \
-		.error = (e), \
+#define CHECK_ENTRY(_nm, _fn, _d, _w, _e, ...)	       \
+	static struct check *_nm##_prereqs[] = { __VA_ARGS__ }; \
+	static struct check _nm = { \
+		.name = #_nm, \
+		.fn = (_fn), \
+		.data = (_d), \
+		.warn = (_w), \
+		.error = (_e), \
 		.status = UNCHECKED, \
-		.num_prereqs = ARRAY_SIZE(nm##_prereqs), \
-		.prereq = nm##_prereqs, \
+		.num_prereqs = ARRAY_SIZE(_nm##_prereqs), \
+		.prereq = _nm##_prereqs, \
 	};
-#define WARNING(nm, tfn, nfn, pfn, d, ...) \
-	CHECK_ENTRY(nm, tfn, nfn, pfn, d, true, false, __VA_ARGS__)
-#define ERROR(nm, tfn, nfn, pfn, d, ...) \
-	CHECK_ENTRY(nm, tfn, nfn, pfn, d, false, true, __VA_ARGS__)
-#define CHECK(nm, tfn, nfn, pfn, d, ...) \
-	CHECK_ENTRY(nm, tfn, nfn, pfn, d, false, false, __VA_ARGS__)
-
-#define TREE_WARNING(nm, d, ...) \
-	WARNING(nm, check_##nm, NULL, NULL, d, __VA_ARGS__)
-#define TREE_ERROR(nm, d, ...) \
-	ERROR(nm, check_##nm, NULL, NULL, d, __VA_ARGS__)
-#define TREE_CHECK(nm, d, ...) \
-	CHECK(nm, check_##nm, NULL, NULL, d, __VA_ARGS__)
-#define NODE_WARNING(nm, d, ...) \
-	WARNING(nm, NULL, check_##nm, NULL, d,  __VA_ARGS__)
-#define NODE_ERROR(nm, d, ...) \
-	ERROR(nm, NULL, check_##nm, NULL, d, __VA_ARGS__)
-#define NODE_CHECK(nm, d, ...) \
-	CHECK(nm, NULL, check_##nm, NULL, d, __VA_ARGS__)
-#define PROP_WARNING(nm, d, ...) \
-	WARNING(nm, NULL, NULL, check_##nm, d, __VA_ARGS__)
-#define PROP_ERROR(nm, d, ...) \
-	ERROR(nm, NULL, NULL, check_##nm, d, __VA_ARGS__)
-#define PROP_CHECK(nm, d, ...) \
-	CHECK(nm, NULL, NULL, check_##nm, d, __VA_ARGS__)
-
-#ifdef __GNUC__
-static inline void check_msg(struct check *c, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
-#endif
-static inline void check_msg(struct check *c, const char *fmt, ...)
+#define WARNING(_nm, _fn, _d, ...) \
+	CHECK_ENTRY(_nm, _fn, _d, true, false, __VA_ARGS__)
+#define ERROR(_nm, _fn, _d, ...) \
+	CHECK_ENTRY(_nm, _fn, _d, false, true, __VA_ARGS__)
+#define CHECK(_nm, _fn, _d, ...) \
+	CHECK_ENTRY(_nm, _fn, _d, false, false, __VA_ARGS__)
+
+static inline void  PRINTF(3, 4) check_msg(struct check *c, struct dt_info *dti,
+					   const char *fmt, ...)
 {
 	va_list ap;
 	va_start(ap, fmt);
 
 	if ((c->warn && (quiet < 1))
 	    || (c->error && (quiet < 2))) {
-		fprintf(stderr, "%s (%s): ",
+		fprintf(stderr, "%s: %s (%s): ",
+			strcmp(dti->outname, "-") ? dti->outname : "<stdout>",
 			(c->error) ? "ERROR" : "Warning", c->name);
 		vfprintf(stderr, fmt, ap);
 		fprintf(stderr, "\n");
@@ -116,34 +89,28 @@ static inline void check_msg(struct check *c, const char *fmt, ...)
 	va_end(ap);
 }
 
-#define FAIL(c, ...) \
-	do { \
-		TRACE((c), "\t\tFAILED at %s:%d", __FILE__, __LINE__); \
-		(c)->status = FAILED; \
-		check_msg((c), __VA_ARGS__); \
+#define FAIL(c, dti, ...)						\
+	do {								\
+		TRACE((c), "\t\tFAILED at %s:%d", __FILE__, __LINE__);	\
+		(c)->status = FAILED;					\
+		check_msg((c), dti, __VA_ARGS__);			\
 	} while (0)
 
-static void check_nodes_props(struct check *c, struct node *dt, struct node *node)
+static void check_nodes_props(struct check *c, struct dt_info *dti, struct node *node)
 {
 	struct node *child;
-	struct property *prop;
 
 	TRACE(c, "%s", node->fullpath);
-	if (c->node_fn)
-		c->node_fn(c, dt, node);
-
-	if (c->prop_fn)
-		for_each_property(node, prop) {
-			TRACE(c, "%s\t'%s'", node->fullpath, prop->name);
-			c->prop_fn(c, dt, node, prop);
-		}
+	if (c->fn)
+		c->fn(c, dti, node);
 
 	for_each_child(node, child)
-		check_nodes_props(c, dt, child);
+		check_nodes_props(c, dti, child);
 }
 
-static bool run_check(struct check *c, struct node *dt)
+static bool run_check(struct check *c, struct dt_info *dti)
 {
+	struct node *dt = dti->dt;
 	bool error = false;
 	int i;
 
@@ -156,10 +123,10 @@ static bool run_check(struct check *c, struct node *dt)
 
 	for (i = 0; i < c->num_prereqs; i++) {
 		struct check *prq = c->prereq[i];
-		error = error || run_check(prq, dt);
+		error = error || run_check(prq, dti);
 		if (prq->status != PASSED) {
 			c->status = PREREQ;
-			check_msg(c, "Failed prerequisite '%s'",
+			check_msg(c, dti, "Failed prerequisite '%s'",
 				  c->prereq[i]->name);
 		}
 	}
@@ -167,11 +134,8 @@ static bool run_check(struct check *c, struct node *dt)
 	if (c->status != UNCHECKED)
 		goto out;
 
-	if (c->node_fn || c->prop_fn)
-		check_nodes_props(c, dt, dt);
+	check_nodes_props(c, dti, dt);
 
-	if (c->tree_fn)
-		c->tree_fn(c, dt);
 	if (c->status == UNCHECKED)
 		c->status = PASSED;
 
@@ -189,13 +153,14 @@ out:
  */
 
 /* A check which always fails, for testing purposes only */
-static inline void check_always_fail(struct check *c, struct node *dt)
+static inline void check_always_fail(struct check *c, struct dt_info *dti,
+				     struct node *node)
 {
-	FAIL(c, "always_fail check");
+	FAIL(c, dti, "always_fail check");
 }
-TREE_CHECK(always_fail, NULL);
+CHECK(always_fail, check_always_fail, NULL);
 
-static void check_is_string(struct check *c, struct node *root,
+static void check_is_string(struct check *c, struct dt_info *dti,
 			    struct node *node)
 {
 	struct property *prop;
@@ -206,15 +171,15 @@ static void check_is_string(struct check *c, struct node *root,
 		return; /* Not present, assumed ok */
 
 	if (!data_is_one_string(prop->val))
-		FAIL(c, "\"%s\" property in %s is not a string",
+		FAIL(c, dti, "\"%s\" property in %s is not a string",
 		     propname, node->fullpath);
 }
 #define WARNING_IF_NOT_STRING(nm, propname) \
-	WARNING(nm, NULL, check_is_string, NULL, (propname))
+	WARNING(nm, check_is_string, (propname))
 #define ERROR_IF_NOT_STRING(nm, propname) \
-	ERROR(nm, NULL, check_is_string, NULL, (propname))
+	ERROR(nm, check_is_string, (propname))
 
-static void check_is_cell(struct check *c, struct node *root,
+static void check_is_cell(struct check *c, struct dt_info *dti,
 			  struct node *node)
 {
 	struct property *prop;
@@ -225,19 +190,19 @@ static void check_is_cell(struct check *c, struct node *root,
 		return; /* Not present, assumed ok */
 
 	if (prop->val.len != sizeof(cell_t))
-		FAIL(c, "\"%s\" property in %s is not a single cell",
+		FAIL(c, dti, "\"%s\" property in %s is not a single cell",
 		     propname, node->fullpath);
 }
 #define WARNING_IF_NOT_CELL(nm, propname) \
-	WARNING(nm, NULL, check_is_cell, NULL, (propname))
+	WARNING(nm, check_is_cell, (propname))
 #define ERROR_IF_NOT_CELL(nm, propname) \
-	ERROR(nm, NULL, check_is_cell, NULL, (propname))
+	ERROR(nm, check_is_cell, (propname))
 
 /*
  * Structural check functions
  */
 
-static void check_duplicate_node_names(struct check *c, struct node *dt,
+static void check_duplicate_node_names(struct check *c, struct dt_info *dti,
 				       struct node *node)
 {
 	struct node *child, *child2;
@@ -247,12 +212,12 @@ static void check_duplicate_node_names(struct check *c, struct node *dt,
 		     child2;
 		     child2 = child2->next_sibling)
 			if (streq(child->name, child2->name))
-				FAIL(c, "Duplicate node name %s",
+				FAIL(c, dti, "Duplicate node name %s",
 				     child->fullpath);
 }
-NODE_ERROR(duplicate_node_names, NULL);
+ERROR(duplicate_node_names, check_duplicate_node_names, NULL);
 
-static void check_duplicate_property_names(struct check *c, struct node *dt,
+static void check_duplicate_property_names(struct check *c, struct dt_info *dti,
 					   struct node *node)
 {
 	struct property *prop, *prop2;
@@ -262,40 +227,52 @@ static void check_duplicate_property_names(struct check *c, struct node *dt,
 			if (prop2->deleted)
 				continue;
 			if (streq(prop->name, prop2->name))
-				FAIL(c, "Duplicate property name %s in %s",
+				FAIL(c, dti, "Duplicate property name %s in %s",
 				     prop->name, node->fullpath);
 		}
 	}
 }
-NODE_ERROR(duplicate_property_names, NULL);
+ERROR(duplicate_property_names, check_duplicate_property_names, NULL);
 
 #define LOWERCASE	"abcdefghijklmnopqrstuvwxyz"
 #define UPPERCASE	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 #define DIGITS		"0123456789"
 #define PROPNODECHARS	LOWERCASE UPPERCASE DIGITS ",._+*#?-"
+#define PROPNODECHARSSTRICT	LOWERCASE UPPERCASE DIGITS ",-"
 
-static void check_node_name_chars(struct check *c, struct node *dt,
+static void check_node_name_chars(struct check *c, struct dt_info *dti,
 				  struct node *node)
 {
 	int n = strspn(node->name, c->data);
 
 	if (n < strlen(node->name))
-		FAIL(c, "Bad character '%c' in node %s",
+		FAIL(c, dti, "Bad character '%c' in node %s",
 		     node->name[n], node->fullpath);
 }
-NODE_ERROR(node_name_chars, PROPNODECHARS "@");
+ERROR(node_name_chars, check_node_name_chars, PROPNODECHARS "@");
 
-static void check_node_name_format(struct check *c, struct node *dt,
+static void check_node_name_chars_strict(struct check *c, struct dt_info *dti,
+					 struct node *node)
+{
+	int n = strspn(node->name, c->data);
+
+	if (n < node->basenamelen)
+		FAIL(c, dti, "Character '%c' not recommended in node %s",
+		     node->name[n], node->fullpath);
+}
+CHECK(node_name_chars_strict, check_node_name_chars_strict, PROPNODECHARSSTRICT);
+
+static void check_node_name_format(struct check *c, struct dt_info *dti,
 				   struct node *node)
 {
 	if (strchr(get_unitname(node), '@'))
-		FAIL(c, "Node %s has multiple '@' characters in name",
+		FAIL(c, dti, "Node %s has multiple '@' characters in name",
 		     node->fullpath);
 }
-NODE_ERROR(node_name_format, NULL, &node_name_chars);
+ERROR(node_name_format, check_node_name_format, NULL, &node_name_chars);
 
-static void check_unit_address_vs_reg(struct check *c, struct node *dt,
-			     struct node *node)
+static void check_unit_address_vs_reg(struct check *c, struct dt_info *dti,
+				      struct node *node)
 {
 	const char *unitname = get_unitname(node);
 	struct property *prop = get_property(node, "reg");
@@ -308,26 +285,62 @@ static void check_unit_address_vs_reg(struct check *c, struct node *dt,
 
 	if (prop) {
 		if (!unitname[0])
-			FAIL(c, "Node %s has a reg or ranges property, but no unit name",
+			FAIL(c, dti, "Node %s has a reg or ranges property, but no unit name",
 			    node->fullpath);
 	} else {
 		if (unitname[0])
-			FAIL(c, "Node %s has a unit name, but no reg property",
+			FAIL(c, dti, "Node %s has a unit name, but no reg property",
 			    node->fullpath);
 	}
 }
-NODE_WARNING(unit_address_vs_reg, NULL);
+WARNING(unit_address_vs_reg, check_unit_address_vs_reg, NULL);
+
+static void check_property_name_chars(struct check *c, struct dt_info *dti,
+				      struct node *node)
+{
+	struct property *prop;
+
+	for_each_property(node, prop) {
+		int n = strspn(prop->name, c->data);
+
+		if (n < strlen(prop->name))
+			FAIL(c, dti, "Bad character '%c' in property name \"%s\", node %s",
+			     prop->name[n], prop->name, node->fullpath);
+	}
+}
+ERROR(property_name_chars, check_property_name_chars, PROPNODECHARS);
 
-static void check_property_name_chars(struct check *c, struct node *dt,
-				      struct node *node, struct property *prop)
+static void check_property_name_chars_strict(struct check *c,
+					     struct dt_info *dti,
+					     struct node *node)
 {
-	int n = strspn(prop->name, c->data);
+	struct property *prop;
+
+	for_each_property(node, prop) {
+		const char *name = prop->name;
+		int n = strspn(name, c->data);
+
+		if (n == strlen(prop->name))
+			continue;
+
+		/* Certain names are whitelisted */
+		if (streq(name, "device_type"))
+			continue;
 
-	if (n < strlen(prop->name))
-		FAIL(c, "Bad character '%c' in property name \"%s\", node %s",
-		     prop->name[n], prop->name, node->fullpath);
+		/*
+		 * # is only allowed at the beginning of property names not counting
+		 * the vendor prefix.
+		 */
+		if (name[n] == '#' && ((n == 0) || (name[n-1] == ','))) {
+			name += n + 1;
+			n = strspn(name, c->data);
+		}
+		if (n < strlen(name))
+			FAIL(c, dti, "Character '%c' not recommended in property name \"%s\", node %s",
+			     name[n], prop->name, node->fullpath);
+	}
 }
-PROP_ERROR(property_name_chars, PROPNODECHARS);
+CHECK(property_name_chars_strict, check_property_name_chars_strict, PROPNODECHARSSTRICT);
 
 #define DESCLABEL_FMT	"%s%s%s%s%s"
 #define DESCLABEL_ARGS(node,prop,mark)		\
@@ -336,10 +349,11 @@ PROP_ERROR(property_name_chars, PROPNODECHARS);
 	((prop) ? (prop)->name : ""), \
 	((prop) ? "' in " : ""), (node)->fullpath
 
-static void check_duplicate_label(struct check *c, struct node *dt,
+static void check_duplicate_label(struct check *c, struct dt_info *dti,
 				  const char *label, struct node *node,
 				  struct property *prop, struct marker *mark)
 {
+	struct node *dt = dti->dt;
 	struct node *othernode = NULL;
 	struct property *otherprop = NULL;
 	struct marker *othermark = NULL;
@@ -356,50 +370,49 @@ static void check_duplicate_label(struct check *c, struct node *dt,
 		return;
 
 	if ((othernode != node) || (otherprop != prop) || (othermark != mark))
-		FAIL(c, "Duplicate label '%s' on " DESCLABEL_FMT
+		FAIL(c, dti, "Duplicate label '%s' on " DESCLABEL_FMT
 		     " and " DESCLABEL_FMT,
 		     label, DESCLABEL_ARGS(node, prop, mark),
 		     DESCLABEL_ARGS(othernode, otherprop, othermark));
 }
 
-static void check_duplicate_label_node(struct check *c, struct node *dt,
+static void check_duplicate_label_node(struct check *c, struct dt_info *dti,
 				       struct node *node)
 {
 	struct label *l;
+	struct property *prop;
 
 	for_each_label(node->labels, l)
-		check_duplicate_label(c, dt, l->label, node, NULL, NULL);
-}
-static void check_duplicate_label_prop(struct check *c, struct node *dt,
-				       struct node *node, struct property *prop)
-{
-	struct marker *m = prop->val.markers;
-	struct label *l;
+		check_duplicate_label(c, dti, l->label, node, NULL, NULL);
 
-	for_each_label(prop->labels, l)
-		check_duplicate_label(c, dt, l->label, node, prop, NULL);
+	for_each_property(node, prop) {
+		struct marker *m = prop->val.markers;
 
-	for_each_marker_of_type(m, LABEL)
-		check_duplicate_label(c, dt, m->ref, node, prop, m);
+		for_each_label(prop->labels, l)
+			check_duplicate_label(c, dti, l->label, node, prop, NULL);
+
+		for_each_marker_of_type(m, LABEL)
+			check_duplicate_label(c, dti, m->ref, node, prop, m);
+	}
 }
-ERROR(duplicate_label, NULL, check_duplicate_label_node,
-      check_duplicate_label_prop, NULL);
+ERROR(duplicate_label, check_duplicate_label_node, NULL);
 
-static void check_explicit_phandles(struct check *c, struct node *root,
-				    struct node *node, struct property *prop)
+static cell_t check_phandle_prop(struct check *c, struct dt_info *dti,
+				 struct node *node, const char *propname)
 {
+	struct node *root = dti->dt;
+	struct property *prop;
 	struct marker *m;
-	struct node *other;
 	cell_t phandle;
 
-	if (!streq(prop->name, "phandle")
-	    && !streq(prop->name, "linux,phandle"))
-		return;
+	prop = get_property(node, propname);
+	if (!prop)
+		return 0;
 
 	if (prop->val.len != sizeof(cell_t)) {
-		FAIL(c, "%s has bad length (%d) %s property",
+		FAIL(c, dti, "%s has bad length (%d) %s property",
 		     node->fullpath, prop->val.len, prop->name);
-		return;
+		return 0;
 	}
 
 	m = prop->val.markers;
@@ -409,42 +422,65 @@ static void check_explicit_phandles(struct check *c, struct node *root,
 			/* "Set this node's phandle equal to some
 			 * other node's phandle".  That's nonsensical
 			 * by construction. */ {
-			FAIL(c, "%s in %s is a reference to another node",
+			FAIL(c, dti, "%s in %s is a reference to another node",
 			     prop->name, node->fullpath);
-			return;
 		}
 		/* But setting this node's phandle equal to its own
 		 * phandle is allowed - that means allocate a unique
 		 * phandle for this node, even if it's not otherwise
 		 * referenced.  The value will be filled in later, so
-		 * no further checking for now. */
-		return;
+		 * we treat it as having no phandle data for now. */
+		return 0;
 	}
 
 	phandle = propval_cell(prop);
 
 	if ((phandle == 0) || (phandle == -1)) {
-		FAIL(c, "%s has bad value (0x%x) in %s property",
+		FAIL(c, dti, "%s has bad value (0x%x) in %s property",
 		     node->fullpath, phandle, prop->name);
-		return;
+		return 0;
 	}
 
-	if (node->phandle && (node->phandle != phandle))
-		FAIL(c, "%s has %s property which replaces existing phandle information",
-		     node->fullpath, prop->name);
+	return phandle;
+}
+
+static void check_explicit_phandles(struct check *c, struct dt_info *dti,
+				    struct node *node)
+{
+	struct node *root = dti->dt;
+	struct node *other;
+	cell_t phandle, linux_phandle;
+
+	/* Nothing should have assigned phandles yet */
+	assert(!node->phandle);
+
+	phandle = check_phandle_prop(c, dti, node, "phandle");
+
+	linux_phandle = check_phandle_prop(c, dti, node, "linux,phandle");
+
+	if (!phandle && !linux_phandle)
+		/* No valid phandles; nothing further to check */
+		return;
+
+	if (linux_phandle && phandle && (phandle != linux_phandle))
+		FAIL(c, dti, "%s has mismatching 'phandle' and 'linux,phandle'"
+		     " properties", node->fullpath);
+
+	if (linux_phandle && !phandle)
+		phandle = linux_phandle;
 
 	other = get_node_by_phandle(root, phandle);
 	if (other && (other != node)) {
-		FAIL(c, "%s has duplicated phandle 0x%x (seen before at %s)",
+		FAIL(c, dti, "%s has duplicated phandle 0x%x (seen before at %s)",
 		     node->fullpath, phandle, other->fullpath);
 		return;
 	}
 
 	node->phandle = phandle;
 }
-PROP_ERROR(explicit_phandles, NULL);
+ERROR(explicit_phandles, check_explicit_phandles, NULL);
 
-static void check_name_properties(struct check *c, struct node *root,
+static void check_name_properties(struct check *c, struct dt_info *dti,
 				  struct node *node)
 {
 	struct property **pp, *prop = NULL;
@@ -460,7 +496,7 @@ static void check_name_properties(struct check *c, struct node *root,
 
 	if ((prop->val.len != node->basenamelen+1)
 	    || (memcmp(prop->val.val, node->name, node->basenamelen) != 0)) {
-		FAIL(c, "\"name\" property in %s is incorrect (\"%s\" instead"
+		FAIL(c, dti, "\"name\" property in %s is incorrect (\"%s\" instead"
 		     " of base node name)", node->fullpath, prop->val.val);
 	} else {
 		/* The name property is correct, and therefore redundant.
@@ -472,60 +508,73 @@ static void check_name_properties(struct check *c, struct node *root,
 	}
 }
 ERROR_IF_NOT_STRING(name_is_string, "name");
-NODE_ERROR(name_properties, NULL, &name_is_string);
+ERROR(name_properties, check_name_properties, NULL, &name_is_string);
 
 /*
  * Reference fixup functions
  */
 
-static void fixup_phandle_references(struct check *c, struct node *dt,
-				     struct node *node, struct property *prop)
+static void fixup_phandle_references(struct check *c, struct dt_info *dti,
+				     struct node *node)
 {
-	struct marker *m = prop->val.markers;
-	struct node *refnode;
-	cell_t phandle;
+	struct node *dt = dti->dt;
+	struct property *prop;
 
-	for_each_marker_of_type(m, REF_PHANDLE) {
-		assert(m->offset + sizeof(cell_t) <= prop->val.len);
+	for_each_property(node, prop) {
+		struct marker *m = prop->val.markers;
+		struct node *refnode;
+		cell_t phandle;
+
+		for_each_marker_of_type(m, REF_PHANDLE) {
+			assert(m->offset + sizeof(cell_t) <= prop->val.len);
+
+			refnode = get_node_by_ref(dt, m->ref);
+			if (! refnode) {
+				if (!(dti->dtsflags & DTSF_PLUGIN))
+					FAIL(c, dti, "Reference to non-existent node or "
+							"label \"%s\"\n", m->ref);
+				else /* mark the entry as unresolved */
+					*((fdt32_t *)(prop->val.val + m->offset)) =
+						cpu_to_fdt32(0xffffffff);
+				continue;
+			}
 
-		refnode = get_node_by_ref(dt, m->ref);
-		if (! refnode) {
-			FAIL(c, "Reference to non-existent node or label \"%s\"\n",
-			     m->ref);
-			continue;
+			phandle = get_node_phandle(dt, refnode);
+			*((fdt32_t *)(prop->val.val + m->offset)) = cpu_to_fdt32(phandle);
 		}
-
-		phandle = get_node_phandle(dt, refnode);
-		*((cell_t *)(prop->val.val + m->offset)) = cpu_to_fdt32(phandle);
 	}
 }
-ERROR(phandle_references, NULL, NULL, fixup_phandle_references, NULL,
+ERROR(phandle_references, fixup_phandle_references, NULL,
       &duplicate_node_names, &explicit_phandles);
 
-static void fixup_path_references(struct check *c, struct node *dt,
-				  struct node *node, struct property *prop)
+static void fixup_path_references(struct check *c, struct dt_info *dti,
+				  struct node *node)
 {
-	struct marker *m = prop->val.markers;
-	struct node *refnode;
-	char *path;
+	struct node *dt = dti->dt;
+	struct property *prop;
+
+	for_each_property(node, prop) {
+		struct marker *m = prop->val.markers;
+		struct node *refnode;
+		char *path;
 
-	for_each_marker_of_type(m, REF_PATH) {
-		assert(m->offset <= prop->val.len);
+		for_each_marker_of_type(m, REF_PATH) {
+			assert(m->offset <= prop->val.len);
 
-		refnode = get_node_by_ref(dt, m->ref);
-		if (!refnode) {
-			FAIL(c, "Reference to non-existent node or label \"%s\"\n",
-			     m->ref);
-			continue;
-		}
+			refnode = get_node_by_ref(dt, m->ref);
+			if (!refnode) {
+				FAIL(c, dti, "Reference to non-existent node or label \"%s\"\n",
+				     m->ref);
+				continue;
+			}
 
-		path = refnode->fullpath;
-		prop->val = data_insert_at_marker(prop->val, m, path,
-						  strlen(path) + 1);
+			path = refnode->fullpath;
+			prop->val = data_insert_at_marker(prop->val, m, path,
+							  strlen(path) + 1);
+		}
 	}
 }
-ERROR(path_references, NULL, NULL, fixup_path_references, NULL,
-      &duplicate_node_names);
+ERROR(path_references, fixup_path_references, NULL, &duplicate_node_names);
 
 /*
  * Semantic checks
@@ -538,7 +587,7 @@ WARNING_IF_NOT_STRING(device_type_is_string, "device_type");
 WARNING_IF_NOT_STRING(model_is_string, "model");
 WARNING_IF_NOT_STRING(status_is_string, "status");
 
-static void fixup_addr_size_cells(struct check *c, struct node *dt,
+static void fixup_addr_size_cells(struct check *c, struct dt_info *dti,
 				  struct node *node)
 {
 	struct property *prop;
@@ -554,7 +603,7 @@ static void fixup_addr_size_cells(struct check *c, struct node *dt,
 	if (prop)
 		node->size_cells = propval_cell(prop);
 }
-WARNING(addr_size_cells, NULL, fixup_addr_size_cells, NULL, NULL,
+WARNING(addr_size_cells, fixup_addr_size_cells, NULL,
 	&address_cells_is_cell, &size_cells_is_cell);
 
 #define node_addr_cells(n) \
@@ -562,7 +611,7 @@ WARNING(addr_size_cells, NULL, fixup_addr_size_cells, NULL, NULL,
 #define node_size_cells(n) \
 	(((n)->size_cells == -1) ? 1 : (n)->size_cells)
 
-static void check_reg_format(struct check *c, struct node *dt,
+static void check_reg_format(struct check *c, struct dt_info *dti,
 			     struct node *node)
 {
 	struct property *prop;
@@ -573,25 +622,25 @@ static void check_reg_format(struct check *c, struct node *dt,
 		return; /* No "reg", that's fine */
 
 	if (!node->parent) {
-		FAIL(c, "Root node has a \"reg\" property");
+		FAIL(c, dti, "Root node has a \"reg\" property");
 		return;
 	}
 
 	if (prop->val.len == 0)
-		FAIL(c, "\"reg\" property in %s is empty", node->fullpath);
+		FAIL(c, dti, "\"reg\" property in %s is empty", node->fullpath);
 
 	addr_cells = node_addr_cells(node->parent);
 	size_cells = node_size_cells(node->parent);
 	entrylen = (addr_cells + size_cells) * sizeof(cell_t);
 
 	if (!entrylen || (prop->val.len % entrylen) != 0)
-		FAIL(c, "\"reg\" property in %s has invalid length (%d bytes) "
+		FAIL(c, dti, "\"reg\" property in %s has invalid length (%d bytes) "
 		     "(#address-cells == %d, #size-cells == %d)",
 		     node->fullpath, prop->val.len, addr_cells, size_cells);
 }
-NODE_WARNING(reg_format, NULL, &addr_size_cells);
+WARNING(reg_format, check_reg_format, NULL, &addr_size_cells);
 
-static void check_ranges_format(struct check *c, struct node *dt,
+static void check_ranges_format(struct check *c, struct dt_info *dti,
 				struct node *node)
 {
 	struct property *prop;
@@ -602,7 +651,7 @@ static void check_ranges_format(struct check *c, struct node *dt,
 		return;
 
 	if (!node->parent) {
-		FAIL(c, "Root node has a \"ranges\" property");
+		FAIL(c, dti, "Root node has a \"ranges\" property");
 		return;
 	}
 
@@ -614,28 +663,28 @@ static void check_ranges_format(struct check *c, struct node *dt,
 
 	if (prop->val.len == 0) {
 		if (p_addr_cells != c_addr_cells)
-			FAIL(c, "%s has empty \"ranges\" property but its "
+			FAIL(c, dti, "%s has empty \"ranges\" property but its "
 			     "#address-cells (%d) differs from %s (%d)",
 			     node->fullpath, c_addr_cells, node->parent->fullpath,
 			     p_addr_cells);
 		if (p_size_cells != c_size_cells)
-			FAIL(c, "%s has empty \"ranges\" property but its "
+			FAIL(c, dti, "%s has empty \"ranges\" property but its "
 			     "#size-cells (%d) differs from %s (%d)",
 			     node->fullpath, c_size_cells, node->parent->fullpath,
 			     p_size_cells);
 	} else if ((prop->val.len % entrylen) != 0) {
-		FAIL(c, "\"ranges\" property in %s has invalid length (%d bytes) "
+		FAIL(c, dti, "\"ranges\" property in %s has invalid length (%d bytes) "
 		     "(parent #address-cells == %d, child #address-cells == %d, "
 		     "#size-cells == %d)", node->fullpath, prop->val.len,
 		     p_addr_cells, c_addr_cells, c_size_cells);
 	}
 }
-NODE_WARNING(ranges_format, NULL, &addr_size_cells);
+WARNING(ranges_format, check_ranges_format, NULL, &addr_size_cells);
 
 /*
  * Style checks
  */
-static void check_avoid_default_addr_size(struct check *c, struct node *dt,
+static void check_avoid_default_addr_size(struct check *c, struct dt_info *dti,
 					  struct node *node)
 {
 	struct property *reg, *ranges;
@@ -650,31 +699,39 @@ static void check_avoid_default_addr_size(struct check *c, struct node *dt,
 		return;
 
 	if (node->parent->addr_cells == -1)
-		FAIL(c, "Relying on default #address-cells value for %s",
+		FAIL(c, dti, "Relying on default #address-cells value for %s",
 		     node->fullpath);
 
 	if (node->parent->size_cells == -1)
-		FAIL(c, "Relying on default #size-cells value for %s",
+		FAIL(c, dti, "Relying on default #size-cells value for %s",
 		     node->fullpath);
 }
-NODE_WARNING(avoid_default_addr_size, NULL, &addr_size_cells);
+WARNING(avoid_default_addr_size, check_avoid_default_addr_size, NULL,
+	&addr_size_cells);
 
 static void check_obsolete_chosen_interrupt_controller(struct check *c,
-						       struct node *dt)
+						       struct dt_info *dti,
+						       struct node *node)
 {
+	struct node *dt = dti->dt;
 	struct node *chosen;
 	struct property *prop;
 
+	if (node != dt)
+		return;
+
+
 	chosen = get_node_by_path(dt, "/chosen");
 	if (!chosen)
 		return;
 
 	prop = get_property(chosen, "interrupt-controller");
 	if (prop)
-		FAIL(c, "/chosen has obsolete \"interrupt-controller\" "
+		FAIL(c, dti, "/chosen has obsolete \"interrupt-controller\" "
 		     "property");
 }
-TREE_WARNING(obsolete_chosen_interrupt_controller, NULL);
+WARNING(obsolete_chosen_interrupt_controller,
+	check_obsolete_chosen_interrupt_controller, NULL);
 
 static struct check *check_table[] = {
 	&duplicate_node_names, &duplicate_property_names,
@@ -689,6 +746,9 @@ static struct check *check_table[] = {
 	&address_cells_is_cell, &size_cells_is_cell, &interrupt_cells_is_cell,
 	&device_type_is_string, &model_is_string, &status_is_string,
 
+	&property_name_chars_strict,
+	&node_name_chars_strict,
+
 	&addr_size_cells, &reg_format, &ranges_format,
 
 	&unit_address_vs_reg,
@@ -760,9 +820,8 @@ void parse_checks_option(bool warn, bool error, const char *arg)
 	die("Unrecognized check name \"%s\"\n", name);
 }
 
-void process_checks(bool force, struct boot_info *bi)
+void process_checks(bool force, struct dt_info *dti)
 {
-	struct node *dt = bi->dt;
 	int i;
 	int error = 0;
 
@@ -770,7 +829,7 @@ void process_checks(bool force, struct boot_info *bi)
 		struct check *c = check_table[i];
 
 		if (c->warn || c->error)
-			error = error || run_check(c, dt);
+			error = error || run_check(c, dti);
 	}
 
 	if (error) {
diff --git a/scripts/dtc/data.c b/scripts/dtc/data.c
index 8cae237..aa37a16 100644
--- a/scripts/dtc/data.c
+++ b/scripts/dtc/data.c
@@ -171,9 +171,9 @@ struct data data_merge(struct data d1, struct data d2)
 struct data data_append_integer(struct data d, uint64_t value, int bits)
 {
 	uint8_t value_8;
-	uint16_t value_16;
-	uint32_t value_32;
-	uint64_t value_64;
+	fdt16_t value_16;
+	fdt32_t value_32;
+	fdt64_t value_64;
 
 	switch (bits) {
 	case 8:
@@ -197,14 +197,14 @@ struct data data_append_integer(struct data d, uint64_t value, int bits)
 	}
 }
 
-struct data data_append_re(struct data d, const struct fdt_reserve_entry *re)
+struct data data_append_re(struct data d, uint64_t address, uint64_t size)
 {
-	struct fdt_reserve_entry bere;
+	struct fdt_reserve_entry re;
 
-	bere.address = cpu_to_fdt64(re->address);
-	bere.size = cpu_to_fdt64(re->size);
+	re.address = cpu_to_fdt64(address);
+	re.size = cpu_to_fdt64(size);
 
-	return data_append_data(d, &bere, sizeof(bere));
+	return data_append_data(d, &re, sizeof(re));
 }
 
 struct data data_append_cell(struct data d, cell_t word)
diff --git a/scripts/dtc/dtc-lexer.l b/scripts/dtc/dtc-lexer.l
index 790fbf6..fd825eb 100644
--- a/scripts/dtc/dtc-lexer.l
+++ b/scripts/dtc/dtc-lexer.l
@@ -62,7 +62,8 @@ static int dts_version = 1;
 
 static void push_input_file(const char *filename);
 static bool pop_input_file(void);
-static void lexical_error(const char *fmt, ...);
+static void PRINTF(1, 2) lexical_error(const char *fmt, ...);
+
 %}
 
 %%
@@ -121,6 +122,11 @@ static void lexical_error(const char *fmt, ...);
 			return DT_V1;
 		}
 
+<*>"/plugin/"	{
+			DPRINT("Keyword: /plugin/\n");
+			return DT_PLUGIN;
+		}
+
 <*>"/memreserve/"	{
 			DPRINT("Keyword: /memreserve/\n");
 			BEGIN_DEFAULT();
@@ -184,16 +190,16 @@ static void lexical_error(const char *fmt, ...);
 			if (d.len == 1) {
 				lexical_error("Empty character literal");
 				yylval.integer = 0;
-				return DT_CHAR_LITERAL;
-			}
-
-			yylval.integer = (unsigned char)d.val[0];
+			} else {
+				yylval.integer = (unsigned char)d.val[0];
 
-			if (d.len > 2)
-				lexical_error("Character literal has %d"
-					      " characters instead of 1",
-					      d.len - 1);
+				if (d.len > 2)
+					lexical_error("Character literal has %d"
+						      " characters instead of 1",
+						      d.len - 1);
+			}
 
+			data_free(d);
 			return DT_CHAR_LITERAL;
 		}
 
diff --git a/scripts/dtc/dtc-lexer.lex.c_shipped b/scripts/dtc/dtc-lexer.lex.c_shipped
index ba525c2..c37d7ff 100644
--- a/scripts/dtc/dtc-lexer.lex.c_shipped
+++ b/scripts/dtc/dtc-lexer.lex.c_shipped
@@ -8,8 +8,8 @@
 
 #define FLEX_SCANNER
 #define YY_FLEX_MAJOR_VERSION 2
-#define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 39
+#define YY_FLEX_MINOR_VERSION 6
+#define YY_FLEX_SUBMINOR_VERSION 1
 #if YY_FLEX_SUBMINOR_VERSION > 0
 #define FLEX_BETA
 #endif
@@ -88,25 +88,13 @@ typedef unsigned int flex_uint32_t;
 
 #endif /* ! FLEXINT_H */
 
-#ifdef __cplusplus
-
-/* The "const" storage-class-modifier is valid. */
-#define YY_USE_CONST
-
-#else	/* ! __cplusplus */
-
-/* C99 requires __STDC__ to be defined as 1. */
-#if defined (__STDC__)
-
-#define YY_USE_CONST
-
-#endif	/* defined (__STDC__) */
-#endif	/* ! __cplusplus */
-
-#ifdef YY_USE_CONST
+/* TODO: this is always defined, so inline it */
 #define yyconst const
+
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define yynoreturn __attribute__((__noreturn__))
 #else
-#define yyconst
+#define yynoreturn
 #endif
 
 /* Returned upon end-of-file. */
@@ -167,7 +155,7 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE;
 typedef size_t yy_size_t;
 #endif
 
-extern yy_size_t yyleng;
+extern int yyleng;
 
 extern FILE *yyin, *yyout;
 
@@ -206,12 +194,12 @@ struct yy_buffer_state
 	/* Size of input buffer in bytes, not including room for EOB
 	 * characters.
 	 */
-	yy_size_t yy_buf_size;
+	int yy_buf_size;
 
 	/* Number of characters read into yy_ch_buf, not including EOB
 	 * characters.
 	 */
-	yy_size_t yy_n_chars;
+	int yy_n_chars;
 
 	/* Whether we "own" the buffer - i.e., we know we created it,
 	 * and can realloc() it to grow it, and should free() it to
@@ -234,7 +222,7 @@ struct yy_buffer_state
 
     int yy_bs_lineno; /**< The line count. */
     int yy_bs_column; /**< The column count. */
-    
+
 	/* Whether to try to fill the input buffer when we reach the
 	 * end of it.
 	 */
@@ -262,7 +250,7 @@ struct yy_buffer_state
 /* Stack of input buffers. */
 static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */
 static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */
-static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
+static YY_BUFFER_STATE * yy_buffer_stack = NULL; /**< Stack as an array. */
 
 /* We provide macros for accessing buffer states in case in the
  * future we want to put the buffer states in a more general
@@ -281,11 +269,11 @@ static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
 
 /* yy_hold_char holds the character lost when yytext is formed. */
 static char yy_hold_char;
-static yy_size_t yy_n_chars;		/* number of characters read into yy_ch_buf */
-yy_size_t yyleng;
+static int yy_n_chars;		/* number of characters read into yy_ch_buf */
+int yyleng;
 
 /* Points to current character in buffer. */
-static char *yy_c_buf_p = (char *) 0;
+static char *yy_c_buf_p = NULL;
 static int yy_init = 0;		/* whether we need to initialize */
 static int yy_start = 0;	/* start state number */
 
@@ -310,7 +298,7 @@ static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file  );
 
 YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size  );
 YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str  );
-YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len  );
+YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len  );
 
 void *yyalloc (yy_size_t  );
 void *yyrealloc (void *,yy_size_t  );
@@ -342,12 +330,12 @@ void yyfree (void *  );
 
 /* Begin user sect3 */
 
-#define yywrap() 1
+#define yywrap() (/*CONSTCOND*/1)
 #define YY_SKIP_YYWRAP
 
 typedef unsigned char YY_CHAR;
 
-FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
+FILE *yyin = NULL, *yyout = NULL;
 
 typedef int yy_state_type;
 
@@ -356,25 +344,28 @@ extern int yylineno;
 int yylineno = 1;
 
 extern char *yytext;
+#ifdef yytext_ptr
+#undef yytext_ptr
+#endif
 #define yytext_ptr yytext
 
 static yy_state_type yy_get_previous_state (void );
 static yy_state_type yy_try_NUL_trans (yy_state_type current_state  );
 static int yy_get_next_buffer (void );
-static void yy_fatal_error (yyconst char msg[]  );
+static void yynoreturn yy_fatal_error (yyconst char* msg  );
 
 /* Done after the current pattern has been matched and before the
  * corresponding action - sets up yytext.
  */
 #define YY_DO_BEFORE_ACTION \
 	(yytext_ptr) = yy_bp; \
-	yyleng = (size_t) (yy_cp - yy_bp); \
+	yyleng = (int) (yy_cp - yy_bp); \
 	(yy_hold_char) = *yy_cp; \
 	*yy_cp = '\0'; \
 	(yy_c_buf_p) = yy_cp;
 
-#define YY_NUM_RULES 30
-#define YY_END_OF_BUFFER 31
+#define YY_NUM_RULES 31
+#define YY_END_OF_BUFFER 32
 /* This struct is not used in this scanner,
    but its presence is necessary. */
 struct yy_trans_info
@@ -382,28 +373,29 @@ struct yy_trans_info
 	flex_int32_t yy_verify;
 	flex_int32_t yy_nxt;
 	};
-static yyconst flex_int16_t yy_accept[159] =
+static yyconst flex_int16_t yy_accept[166] =
     {   0,
-        0,    0,    0,    0,    0,    0,    0,    0,   31,   29,
-       18,   18,   29,   29,   29,   29,   29,   29,   29,   29,
-       29,   29,   29,   29,   29,   29,   15,   16,   16,   29,
-       16,   10,   10,   18,   26,    0,    3,    0,   27,   12,
-        0,    0,   11,    0,    0,    0,    0,    0,    0,    0,
-       21,   23,   25,   24,   22,    0,    9,   28,    0,    0,
-        0,   14,   14,   16,   16,   16,   10,   10,   10,    0,
-       12,    0,   11,    0,    0,    0,   20,    0,    0,    0,
-        0,    0,    0,    0,    0,   16,   10,   10,   10,    0,
-       13,   19,    0,    0,    0,    0,    0,    0,    0,    0,
-
-        0,   16,    0,    0,    0,    0,    0,    0,    0,    0,
-        0,   16,    6,    0,    0,    0,    0,    0,    0,    2,
-        0,    0,    0,    0,    0,    0,    0,    0,    4,   17,
-        0,    0,    2,    0,    0,    0,    0,    0,    0,    0,
-        0,    0,    0,    0,    0,    1,    0,    0,    0,    0,
-        5,    8,    0,    0,    0,    0,    7,    0
+        0,    0,    0,    0,    0,    0,    0,    0,   32,   30,
+       19,   19,   30,   30,   30,   30,   30,   30,   30,   30,
+       30,   30,   30,   30,   30,   30,   16,   17,   17,   30,
+       17,   11,   11,   19,   27,    0,    3,    0,   28,   13,
+        0,    0,   12,    0,    0,    0,    0,    0,    0,    0,
+        0,   22,   24,   26,   25,   23,    0,   10,   29,    0,
+        0,    0,   15,   15,   17,   17,   17,   11,   11,   11,
+        0,   13,    0,   12,    0,    0,    0,   21,    0,    0,
+        0,    0,    0,    0,    0,    0,    0,   17,   11,   11,
+       11,    0,   14,   20,    0,    0,    0,    0,    0,    0,
+
+        0,    0,    0,    0,   17,    0,    0,    0,    0,    0,
+        0,    0,    0,    0,    0,   17,    7,    0,    0,    0,
+        0,    0,    0,    0,    2,    0,    0,    0,    0,    0,
+        0,    0,    0,    0,    4,   18,    0,    0,    5,    2,
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+        0,    0,    1,    0,    0,    0,    0,    6,    9,    0,
+        0,    0,    0,    8,    0
     } ;
 
-static yyconst flex_int32_t yy_ec[256] =
+static yyconst YY_CHAR yy_ec[256] =
     {   0,
         1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
         4,    4,    4,    1,    1,    1,    1,    1,    1,    1,
@@ -416,9 +408,9 @@ static yyconst flex_int32_t yy_ec[256] =
        22,   22,   22,   22,   24,   22,   22,   25,   22,   22,
         1,   26,   27,    1,   22,    1,   21,   28,   29,   30,
 
-       31,   21,   22,   22,   32,   22,   22,   33,   34,   35,
-       36,   37,   22,   38,   39,   40,   41,   42,   22,   25,
-       43,   22,   44,   45,   46,    1,    1,    1,    1,    1,
+       31,   21,   32,   22,   33,   22,   22,   34,   35,   36,
+       37,   38,   22,   39,   40,   41,   42,   43,   22,   25,
+       44,   22,   45,   46,   47,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
@@ -435,163 +427,165 @@ static yyconst flex_int32_t yy_ec[256] =
         1,    1,    1,    1,    1
     } ;
 
-static yyconst flex_int32_t yy_meta[47] =
+static yyconst YY_CHAR yy_meta[48] =
     {   0,
         1,    1,    1,    1,    1,    1,    2,    3,    1,    2,
         2,    2,    4,    5,    5,    5,    6,    1,    1,    1,
         7,    8,    8,    8,    8,    1,    1,    7,    7,    7,
         7,    8,    8,    8,    8,    8,    8,    8,    8,    8,
-        8,    8,    8,    3,    1,    4
+        8,    8,    8,    8,    3,    1,    4
     } ;
 
-static yyconst flex_int16_t yy_base[173] =
+static yyconst flex_uint16_t yy_base[180] =
     {   0,
-        0,  383,   34,  382,   65,  381,   37,  105,  387,  391,
-       54,  111,  367,  110,  109,  109,  112,   41,  366,  104,
-      367,  338,  124,  117,    0,  144,  391,    0,  121,    0,
-      135,  155,  140,  179,  391,  160,  391,  379,  391,    0,
-      368,  141,  391,  167,  370,  376,  346,  103,  342,  345,
-      391,  391,  391,  391,  391,  358,  391,  391,  175,  342,
-      338,  391,  355,    0,  185,  339,  184,  347,  346,    0,
-        0,  322,  175,  357,  175,  363,  352,  324,  330,  323,
-      332,  326,  201,  324,  329,  322,  391,  333,  181,  309,
-      391,  341,  340,  313,  320,  338,  178,  311,  146,  317,
-
-      314,  315,  335,  331,  303,  300,  309,  299,  308,  188,
-      336,  335,  391,  305,  320,  281,  283,  271,  203,  288,
-      281,  271,  266,  264,  245,  242,  208,  104,  391,  391,
-      244,  218,  204,  219,  206,  224,  201,  212,  204,  229,
-      215,  208,  207,  200,  219,  391,  233,  221,  200,  181,
-      391,  391,  149,  122,   86,   41,  391,  391,  245,  251,
-      259,  263,  267,  273,  280,  284,  292,  300,  304,  310,
-      318,  326
+        0,  393,   35,  392,   66,  391,   38,  107,  397,  401,
+       55,  113,  377,  112,  111,  111,  114,   42,  376,  106,
+      377,  347,  126,  120,    0,  147,  401,    0,  124,    0,
+      137,  158,  170,  163,  401,  153,  401,  389,  401,    0,
+      378,  120,  401,  131,  380,  386,  355,  139,  351,  355,
+      351,  401,  401,  401,  401,  401,  367,  401,  401,  185,
+      350,  346,  401,  364,    0,  185,  347,  189,  356,  355,
+        0,    0,  330,  180,  366,  141,  372,  361,  332,  338,
+      331,  341,  334,  326,  205,  331,  337,  329,  401,  341,
+      167,  316,  401,  349,  348,  320,  328,  346,  180,  318,
+
+      324,  209,  324,  320,  322,  342,  338,  309,  306,  315,
+      305,  315,  312,  192,  342,  341,  401,  293,  306,  282,
+      268,  252,  255,  203,  285,  282,  272,  268,  252,  233,
+      232,  239,  208,  107,  401,  401,  238,  211,  401,  211,
+      212,  208,  228,  203,  215,  207,  233,  222,  212,  211,
+      203,  227,  401,  237,  225,  204,  185,  401,  401,  149,
+      128,   88,   42,  401,  401,  253,  259,  267,  271,  275,
+      281,  288,  292,  300,  308,  312,  318,  326,  334
     } ;
 
-static yyconst flex_int16_t yy_def[173] =
+static yyconst flex_int16_t yy_def[180] =
     {   0,
-      158,    1,    1,    3,  158,    5,    1,    1,  158,  158,
-      158,  158,  158,  159,  160,  161,  158,  158,  158,  158,
-      162,  158,  158,  158,  163,  162,  158,  164,  165,  164,
-      164,  158,  158,  158,  158,  159,  158,  159,  158,  166,
-      158,  161,  158,  161,  167,  168,  158,  158,  158,  158,
-      158,  158,  158,  158,  158,  162,  158,  158,  158,  158,
-      158,  158,  162,  164,  165,  164,  158,  158,  158,  169,
-      166,  170,  161,  167,  167,  168,  158,  158,  158,  158,
-      158,  158,  158,  158,  158,  164,  158,  158,  169,  170,
-      158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
-
-      158,  164,  158,  158,  158,  158,  158,  158,  158,  171,
-      158,  164,  158,  158,  158,  158,  158,  158,  171,  158,
-      171,  158,  158,  158,  158,  158,  158,  158,  158,  158,
-      158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
-      172,  158,  158,  158,  172,  158,  172,  158,  158,  158,
-      158,  158,  158,  158,  158,  158,  158,    0,  158,  158,
-      158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
-      158,  158
+      165,    1,    1,    3,  165,    5,    1,    1,  165,  165,
+      165,  165,  165,  166,  167,  168,  165,  165,  165,  165,
+      169,  165,  165,  165,  170,  169,  165,  171,  172,  171,
+      171,  165,  165,  165,  165,  166,  165,  166,  165,  173,
+      165,  168,  165,  168,  174,  175,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  169,  165,  165,  165,
+      165,  165,  165,  169,  171,  172,  171,  165,  165,  165,
+      176,  173,  177,  168,  174,  174,  175,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  171,  165,  165,
+      176,  177,  165,  165,  165,  165,  165,  165,  165,  165,
+
+      165,  165,  165,  165,  171,  165,  165,  165,  165,  165,
+      165,  165,  165,  178,  165,  171,  165,  165,  165,  165,
+      165,  165,  165,  178,  165,  178,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  179,  165,  165,
+      165,  179,  165,  179,  165,  165,  165,  165,  165,  165,
+      165,  165,  165,  165,    0,  165,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  165,  165
     } ;
 
-static yyconst flex_int16_t yy_nxt[438] =
+static yyconst flex_uint16_t yy_nxt[449] =
     {   0,
        10,   11,   12,   11,   13,   14,   10,   15,   16,   10,
        10,   10,   17,   10,   10,   10,   10,   18,   19,   20,
        21,   21,   21,   21,   21,   10,   10,   21,   21,   21,
        21,   21,   21,   21,   21,   21,   21,   21,   21,   21,
-       21,   21,   21,   10,   22,   10,   24,   25,   25,   25,
-       32,   33,   33,  157,   26,   34,   34,   34,   51,   52,
-       27,   26,   26,   26,   26,   10,   11,   12,   11,   13,
-       14,   28,   15,   16,   28,   28,   28,   24,   28,   28,
-       28,   10,   18,   19,   20,   29,   29,   29,   29,   29,
-       30,   10,   29,   29,   29,   29,   29,   29,   29,   29,
-
-       29,   29,   29,   29,   29,   29,   29,   29,   10,   22,
-       10,   23,   34,   34,   34,   37,   39,   43,   32,   33,
-       33,   45,   54,   55,   46,   59,   45,   64,  156,   46,
-       64,   64,   64,   79,   44,   38,   59,   57,  134,   47,
-      135,   48,   80,   49,   47,   50,   48,   99,   61,   43,
-       50,  110,   41,   67,   67,   67,   60,   63,   63,   63,
-       57,  155,   68,   69,   63,   37,   44,   66,   67,   67,
-       67,   63,   63,   63,   63,   73,   59,   68,   69,   70,
-       34,   34,   34,   43,   75,   38,  154,   92,   83,   83,
-       83,   64,   44,  120,   64,   64,   64,   67,   67,   67,
-
-       44,   57,   99,   68,   69,  107,   68,   69,  120,  127,
-      108,  153,  152,  121,   83,   83,   83,  133,  133,  133,
-      146,  133,  133,  133,  146,  140,  140,  140,  121,  141,
-      140,  140,  140,  151,  141,  158,  150,  149,  148,  144,
-      147,  143,  142,  139,  147,   36,   36,   36,   36,   36,
-       36,   36,   36,   40,  138,  137,  136,   40,   40,   42,
-       42,   42,   42,   42,   42,   42,   42,   56,   56,   56,
-       56,   62,  132,   62,   64,  131,  130,   64,  129,   64,
-       64,   65,  128,  158,   65,   65,   65,   65,   71,  127,
-       71,   71,   74,   74,   74,   74,   74,   74,   74,   74,
-
-       76,   76,   76,   76,   76,   76,   76,   76,   89,  126,
-       89,   90,  125,   90,   90,  124,   90,   90,  119,  119,
-      119,  119,  119,  119,  119,  119,  145,  145,  145,  145,
-      145,  145,  145,  145,  123,  122,   59,   59,  118,  117,
-      116,  115,  114,  113,   45,  112,  108,  111,  109,  106,
-      105,  104,   46,  103,   91,   87,  102,  101,  100,   98,
-       97,   96,   95,   94,   93,   77,   75,   91,   88,   87,
-       86,   57,   85,   84,   57,   82,   81,   78,   77,   75,
-       72,  158,   58,   57,   53,   35,  158,   31,   23,   23,
-        9,  158,  158,  158,  158,  158,  158,  158,  158,  158,
-
-      158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
-      158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
-      158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
-      158,  158,  158,  158,  158,  158,  158
+       21,   21,   21,   21,   10,   22,   10,   24,   25,   25,
+       25,   32,   33,   33,  164,   26,   34,   34,   34,   52,
+       53,   27,   26,   26,   26,   26,   10,   11,   12,   11,
+       13,   14,   28,   15,   16,   28,   28,   28,   24,   28,
+       28,   28,   10,   18,   19,   20,   29,   29,   29,   29,
+       29,   30,   10,   29,   29,   29,   29,   29,   29,   29,
+
+       29,   29,   29,   29,   29,   29,   29,   29,   29,   29,
+       10,   22,   10,   23,   34,   34,   34,   37,   39,   43,
+       32,   33,   33,   45,   55,   56,   46,   60,   43,   45,
+       65,  163,   46,   65,   65,   65,   44,   38,   60,   74,
+       58,   47,  141,   48,  142,   44,   49,   47,   50,   48,
+       76,   51,   62,   94,   50,   41,   44,   51,   37,   61,
+       64,   64,   64,   58,   34,   34,   34,   64,  162,   80,
+       67,   68,   68,   68,   64,   64,   64,   64,   38,   81,
+       69,   70,   71,   68,   68,   68,   60,  161,   43,   69,
+       70,   65,   69,   70,   65,   65,   65,  125,   85,   85,
+
+       85,   58,   68,   68,   68,   44,  102,  110,  125,  133,
+      102,   69,   70,  111,  114,  160,  159,  126,   85,   85,
+       85,  140,  140,  140,  140,  140,  140,  153,  126,  147,
+      147,  147,  153,  148,  147,  147,  147,  158,  148,  165,
+      157,  156,  155,  151,  150,  149,  146,  154,  145,  144,
+      143,  139,  154,   36,   36,   36,   36,   36,   36,   36,
+       36,   40,  138,  137,  136,   40,   40,   42,   42,   42,
+       42,   42,   42,   42,   42,   57,   57,   57,   57,   63,
+      135,   63,   65,  134,  165,   65,  133,   65,   65,   66,
+      132,  131,   66,   66,   66,   66,   72,  130,   72,   72,
+
+       75,   75,   75,   75,   75,   75,   75,   75,   77,   77,
+       77,   77,   77,   77,   77,   77,   91,  129,   91,   92,
+      128,   92,   92,  127,   92,   92,  124,  124,  124,  124,
+      124,  124,  124,  124,  152,  152,  152,  152,  152,  152,
+      152,  152,   60,   60,  123,  122,  121,  120,  119,  118,
+      117,   45,  116,  111,  115,  113,  112,  109,  108,  107,
+       46,  106,   93,   89,  105,  104,  103,  101,  100,   99,
+       98,   97,   96,   95,   78,   76,   93,   90,   89,   88,
+       58,   87,   86,   58,   84,   83,   82,   79,   78,   76,
+       73,  165,   59,   58,   54,   35,  165,   31,   23,   23,
+
+        9,  165,  165,  165,  165,  165,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  165
     } ;
 
-static yyconst flex_int16_t yy_chk[438] =
+static yyconst flex_int16_t yy_chk[449] =
     {   0,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    3,    3,    3,    3,
-        7,    7,    7,  156,    3,   11,   11,   11,   18,   18,
-        3,    3,    3,    3,    3,    5,    5,    5,    5,    5,
+        1,    1,    1,    1,    1,    1,    1,    3,    3,    3,
+        3,    7,    7,    7,  163,    3,   11,   11,   11,   18,
+       18,    3,    3,    3,    3,    3,    5,    5,    5,    5,
         5,    5,    5,    5,    5,    5,    5,    5,    5,    5,
         5,    5,    5,    5,    5,    5,    5,    5,    5,    5,
         5,    5,    5,    5,    5,    5,    5,    5,    5,    5,
 
         5,    5,    5,    5,    5,    5,    5,    5,    5,    5,
-        5,    8,   12,   12,   12,   14,   15,   16,    8,    8,
-        8,   17,   20,   20,   17,   23,   24,   29,  155,   24,
-       29,   29,   29,   48,   16,   14,   31,   29,  128,   17,
-      128,   17,   48,   17,   24,   17,   24,   99,   24,   42,
-       24,   99,   15,   33,   33,   33,   23,   26,   26,   26,
-       26,  154,   33,   33,   26,   36,   42,   31,   32,   32,
-       32,   26,   26,   26,   26,   44,   59,   32,   32,   32,
-       34,   34,   34,   73,   75,   36,  153,   75,   59,   59,
-       59,   65,   44,  110,   65,   65,   65,   67,   67,   67,
-
-       73,   65,   83,   89,   89,   97,   67,   67,  119,  127,
-       97,  150,  149,  110,   83,   83,   83,  133,  133,  133,
-      141,  127,  127,  127,  145,  136,  136,  136,  119,  136,
-      140,  140,  140,  148,  140,  147,  144,  143,  142,  139,
-      141,  138,  137,  135,  145,  159,  159,  159,  159,  159,
-      159,  159,  159,  160,  134,  132,  131,  160,  160,  161,
-      161,  161,  161,  161,  161,  161,  161,  162,  162,  162,
-      162,  163,  126,  163,  164,  125,  124,  164,  123,  164,
-      164,  165,  122,  121,  165,  165,  165,  165,  166,  120,
-      166,  166,  167,  167,  167,  167,  167,  167,  167,  167,
-
-      168,  168,  168,  168,  168,  168,  168,  168,  169,  118,
-      169,  170,  117,  170,  170,  116,  170,  170,  171,  171,
-      171,  171,  171,  171,  171,  171,  172,  172,  172,  172,
-      172,  172,  172,  172,  115,  114,  112,  111,  109,  108,
-      107,  106,  105,  104,  103,  102,  101,  100,   98,   96,
-       95,   94,   93,   92,   90,   88,   86,   85,   84,   82,
-       81,   80,   79,   78,   77,   76,   74,   72,   69,   68,
-       66,   63,   61,   60,   56,   50,   49,   47,   46,   45,
+        5,    5,    5,    8,   12,   12,   12,   14,   15,   16,
+        8,    8,    8,   17,   20,   20,   17,   23,   42,   24,
+       29,  162,   24,   29,   29,   29,   16,   14,   31,   44,
+       29,   17,  134,   17,  134,   42,   17,   24,   17,   24,
+       76,   17,   24,   76,   24,   15,   44,   24,   36,   23,
+       26,   26,   26,   26,   34,   34,   34,   26,  161,   48,
+       31,   32,   32,   32,   26,   26,   26,   26,   36,   48,
+       32,   32,   32,   33,   33,   33,   60,  160,   74,   91,
+       91,   66,   33,   33,   66,   66,   66,  114,   60,   60,
+
+       60,   66,   68,   68,   68,   74,   85,   99,  124,  133,
+      102,   68,   68,   99,  102,  157,  156,  114,   85,   85,
+       85,  133,  133,  133,  140,  140,  140,  148,  124,  143,
+      143,  143,  152,  143,  147,  147,  147,  155,  147,  154,
+      151,  150,  149,  146,  145,  144,  142,  148,  141,  138,
+      137,  132,  152,  166,  166,  166,  166,  166,  166,  166,
+      166,  167,  131,  130,  129,  167,  167,  168,  168,  168,
+      168,  168,  168,  168,  168,  169,  169,  169,  169,  170,
+      128,  170,  171,  127,  126,  171,  125,  171,  171,  172,
+      123,  122,  172,  172,  172,  172,  173,  121,  173,  173,
+
+      174,  174,  174,  174,  174,  174,  174,  174,  175,  175,
+      175,  175,  175,  175,  175,  175,  176,  120,  176,  177,
+      119,  177,  177,  118,  177,  177,  178,  178,  178,  178,
+      178,  178,  178,  178,  179,  179,  179,  179,  179,  179,
+      179,  179,  116,  115,  113,  112,  111,  110,  109,  108,
+      107,  106,  105,  104,  103,  101,  100,   98,   97,   96,
+       95,   94,   92,   90,   88,   87,   86,   84,   83,   82,
+       81,   80,   79,   78,   77,   75,   73,   70,   69,   67,
+       64,   62,   61,   57,   51,   50,   49,   47,   46,   45,
        41,   38,   22,   21,   19,   13,    9,    6,    4,    2,
-      158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
 
-      158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
-      158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
-      158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
-      158,  158,  158,  158,  158,  158,  158
+      165,  165,  165,  165,  165,  165,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  165,  165,  165,
+      165,  165,  165,  165,  165,  165,  165,  165
     } ;
 
 static yy_state_type yy_last_accepting_state;
@@ -661,8 +655,9 @@ static int dts_version = 1;
 
 static void push_input_file(const char *filename);
 static bool pop_input_file(void);
-static void lexical_error(const char *fmt, ...);
-#line 666 "dtc-lexer.lex.c"
+static void PRINTF(1, 2) lexical_error(const char *fmt, ...);
+
+#line 661 "dtc-lexer.lex.c"
 
 #define INITIAL 0
 #define BYTESTRING 1
@@ -698,19 +693,19 @@ void yyset_extra (YY_EXTRA_TYPE user_defined  );
 
 FILE *yyget_in (void );
 
-void yyset_in  (FILE * in_str  );
+void yyset_in  (FILE * _in_str  );
 
 FILE *yyget_out (void );
 
-void yyset_out  (FILE * out_str  );
+void yyset_out  (FILE * _out_str  );
 
-yy_size_t yyget_leng (void );
+			int yyget_leng (void );
 
 char *yyget_text (void );
 
 int yyget_lineno (void );
 
-void yyset_lineno (int line_number  );
+void yyset_lineno (int _line_number  );
 
 /* Macros after this point can all be overridden by user definitions in
  * section 1.
@@ -724,6 +719,10 @@ extern int yywrap (void );
 #endif
 #endif
 
+#ifndef YY_NO_UNPUT
+
+#endif
+
 #ifndef yytext_ptr
 static void yy_flex_strncpy (char *,yyconst char *,int );
 #endif
@@ -757,7 +756,7 @@ static int input (void );
 /* This used to be an fputs(), but since the string might contain NUL's,
  * we now use fwrite().
  */
-#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0)
+#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0)
 #endif
 
 /* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
@@ -781,7 +780,7 @@ static int input (void );
 	else \
 		{ \
 		errno=0; \
-		while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \
+		while ( (result = (int) fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \
 			{ \
 			if( errno != EINTR) \
 				{ \
@@ -836,7 +835,7 @@ extern int yylex (void);
 
 /* Code executed at the end of each rule. */
 #ifndef YY_BREAK
-#define YY_BREAK break;
+#define YY_BREAK /*LINTED*/break;
 #endif
 
 #define YY_RULE_SETUP \
@@ -849,9 +848,9 @@ extern int yylex (void);
  */
 YY_DECL
 {
-	register yy_state_type yy_current_state;
-	register char *yy_cp, *yy_bp;
-	register int yy_act;
+	yy_state_type yy_current_state;
+	char *yy_cp, *yy_bp;
+	int yy_act;
     
 	if ( !(yy_init) )
 		{
@@ -880,11 +879,11 @@ YY_DECL
 		}
 
 	{
-#line 68 "dtc-lexer.l"
+#line 69 "dtc-lexer.l"
 
-#line 886 "dtc-lexer.lex.c"
+#line 885 "dtc-lexer.lex.c"
 
-	while ( 1 )		/* loops until end-of-file is reached */
+	while ( /*CONSTCOND*/1 )		/* loops until end-of-file is reached */
 		{
 		yy_cp = (yy_c_buf_p);
 
@@ -901,7 +900,7 @@ YY_DECL
 yy_match:
 		do
 			{
-			register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ;
+			YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ;
 			if ( yy_accept[yy_current_state] )
 				{
 				(yy_last_accepting_state) = yy_current_state;
@@ -910,13 +909,13 @@ yy_match:
 			while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 				{
 				yy_current_state = (int) yy_def[yy_current_state];
-				if ( yy_current_state >= 159 )
+				if ( yy_current_state >= 166 )
 					yy_c = yy_meta[(unsigned int) yy_c];
 				}
-			yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+			yy_current_state = yy_nxt[yy_base[yy_current_state] + (flex_int16_t) yy_c];
 			++yy_cp;
 			}
-		while ( yy_current_state != 158 );
+		while ( yy_current_state != 165 );
 		yy_cp = (yy_last_accepting_cpos);
 		yy_current_state = (yy_last_accepting_state);
 
@@ -939,7 +938,7 @@ do_action:	/* This label is used only to access EOF actions. */
 case 1:
 /* rule 1 can match eol */
 YY_RULE_SETUP
-#line 69 "dtc-lexer.l"
+#line 70 "dtc-lexer.l"
 {
 			char *name = strchr(yytext, '\"') + 1;
 			yytext[yyleng-1] = '\0';
@@ -949,7 +948,7 @@ YY_RULE_SETUP
 case 2:
 /* rule 2 can match eol */
 YY_RULE_SETUP
-#line 75 "dtc-lexer.l"
+#line 76 "dtc-lexer.l"
 {
 			char *line, *fnstart, *fnend;
 			struct data fn;
@@ -983,7 +982,7 @@ case YY_STATE_EOF(INITIAL):
 case YY_STATE_EOF(BYTESTRING):
 case YY_STATE_EOF(PROPNODENAME):
 case YY_STATE_EOF(V1):
-#line 104 "dtc-lexer.l"
+#line 105 "dtc-lexer.l"
 {
 			if (!pop_input_file()) {
 				yyterminate();
@@ -993,7 +992,7 @@ case YY_STATE_EOF(V1):
 case 3:
 /* rule 3 can match eol */
 YY_RULE_SETUP
-#line 110 "dtc-lexer.l"
+#line 111 "dtc-lexer.l"
 {
 			DPRINT("String: %s\n", yytext);
 			yylval.data = data_copy_escape_string(yytext+1,
@@ -1003,7 +1002,7 @@ YY_RULE_SETUP
 	YY_BREAK
 case 4:
 YY_RULE_SETUP
-#line 117 "dtc-lexer.l"
+#line 118 "dtc-lexer.l"
 {
 			DPRINT("Keyword: /dts-v1/\n");
 			dts_version = 1;
@@ -1013,25 +1012,33 @@ YY_RULE_SETUP
 	YY_BREAK
 case 5:
 YY_RULE_SETUP
-#line 124 "dtc-lexer.l"
+#line 125 "dtc-lexer.l"
+{
+			DPRINT("Keyword: /plugin/\n");
+			return DT_PLUGIN;
+		}
+	YY_BREAK
+case 6:
+YY_RULE_SETUP
+#line 130 "dtc-lexer.l"
 {
 			DPRINT("Keyword: /memreserve/\n");
 			BEGIN_DEFAULT();
 			return DT_MEMRESERVE;
 		}
 	YY_BREAK
-case 6:
+case 7:
 YY_RULE_SETUP
-#line 130 "dtc-lexer.l"
+#line 136 "dtc-lexer.l"
 {
 			DPRINT("Keyword: /bits/\n");
 			BEGIN_DEFAULT();
 			return DT_BITS;
 		}
 	YY_BREAK
-case 7:
+case 8:
 YY_RULE_SETUP
-#line 136 "dtc-lexer.l"
+#line 142 "dtc-lexer.l"
 {
 			DPRINT("Keyword: /delete-property/\n");
 			DPRINT("<PROPNODENAME>\n");
@@ -1039,9 +1046,9 @@ YY_RULE_SETUP
 			return DT_DEL_PROP;
 		}
 	YY_BREAK
-case 8:
+case 9:
 YY_RULE_SETUP
-#line 143 "dtc-lexer.l"
+#line 149 "dtc-lexer.l"
 {
 			DPRINT("Keyword: /delete-node/\n");
 			DPRINT("<PROPNODENAME>\n");
@@ -1049,9 +1056,9 @@ YY_RULE_SETUP
 			return DT_DEL_NODE;
 		}
 	YY_BREAK
-case 9:
+case 10:
 YY_RULE_SETUP
-#line 150 "dtc-lexer.l"
+#line 156 "dtc-lexer.l"
 {
 			DPRINT("Label: %s\n", yytext);
 			yylval.labelref = xstrdup(yytext);
@@ -1059,9 +1066,9 @@ YY_RULE_SETUP
 			return DT_LABEL;
 		}
 	YY_BREAK
-case 10:
+case 11:
 YY_RULE_SETUP
-#line 157 "dtc-lexer.l"
+#line 163 "dtc-lexer.l"
 {
 			char *e;
 			DPRINT("Integer Literal: '%s'\n", yytext);
@@ -1084,10 +1091,10 @@ YY_RULE_SETUP
 			return DT_LITERAL;
 		}
 	YY_BREAK
-case 11:
-/* rule 11 can match eol */
+case 12:
+/* rule 12 can match eol */
 YY_RULE_SETUP
-#line 179 "dtc-lexer.l"
+#line 185 "dtc-lexer.l"
 {
 			struct data d;
 			DPRINT("Character literal: %s\n", yytext);
@@ -1096,31 +1103,31 @@ YY_RULE_SETUP
 			if (d.len == 1) {
 				lexical_error("Empty character literal");
 				yylval.integer = 0;
-				return DT_CHAR_LITERAL;
-			}
+			} else {
+				yylval.integer = (unsigned char)d.val[0];
 
-			yylval.integer = (unsigned char)d.val[0];
-
-			if (d.len > 2)
-				lexical_error("Character literal has %d"
-					      " characters instead of 1",
-					      d.len - 1);
+				if (d.len > 2)
+					lexical_error("Character literal has %d"
+						      " characters instead of 1",
+						      d.len - 1);
+			}
 
+			data_free(d);
 			return DT_CHAR_LITERAL;
 		}
 	YY_BREAK
-case 12:
+case 13:
 YY_RULE_SETUP
-#line 200 "dtc-lexer.l"
+#line 206 "dtc-lexer.l"
 {	/* label reference */
 			DPRINT("Ref: %s\n", yytext+1);
 			yylval.labelref = xstrdup(yytext+1);
 			return DT_REF;
 		}
 	YY_BREAK
-case 13:
+case 14:
 YY_RULE_SETUP
-#line 206 "dtc-lexer.l"
+#line 212 "dtc-lexer.l"
 {	/* new-style path reference */
 			yytext[yyleng-1] = '\0';
 			DPRINT("Ref: %s\n", yytext+2);
@@ -1128,27 +1135,27 @@ YY_RULE_SETUP
 			return DT_REF;
 		}
 	YY_BREAK
-case 14:
+case 15:
 YY_RULE_SETUP
-#line 213 "dtc-lexer.l"
+#line 219 "dtc-lexer.l"
 {
 			yylval.byte = strtol(yytext, NULL, 16);
 			DPRINT("Byte: %02x\n", (int)yylval.byte);
 			return DT_BYTE;
 		}
 	YY_BREAK
-case 15:
+case 16:
 YY_RULE_SETUP
-#line 219 "dtc-lexer.l"
+#line 225 "dtc-lexer.l"
 {
 			DPRINT("/BYTESTRING\n");
 			BEGIN_DEFAULT();
 			return ']';
 		}
 	YY_BREAK
-case 16:
+case 17:
 YY_RULE_SETUP
-#line 225 "dtc-lexer.l"
+#line 231 "dtc-lexer.l"
 {
 			DPRINT("PropNodeName: %s\n", yytext);
 			yylval.propnodename = xstrdup((yytext[0] == '\\') ?
@@ -1157,75 +1164,75 @@ YY_RULE_SETUP
 			return DT_PROPNODENAME;
 		}
 	YY_BREAK
-case 17:
+case 18:
 YY_RULE_SETUP
-#line 233 "dtc-lexer.l"
+#line 239 "dtc-lexer.l"
 {
 			DPRINT("Binary Include\n");
 			return DT_INCBIN;
 		}
 	YY_BREAK
-case 18:
-/* rule 18 can match eol */
-YY_RULE_SETUP
-#line 238 "dtc-lexer.l"
-/* eat whitespace */
-	YY_BREAK
 case 19:
 /* rule 19 can match eol */
 YY_RULE_SETUP
-#line 239 "dtc-lexer.l"
-/* eat C-style comments */
+#line 244 "dtc-lexer.l"
+/* eat whitespace */
 	YY_BREAK
 case 20:
 /* rule 20 can match eol */
 YY_RULE_SETUP
-#line 240 "dtc-lexer.l"
-/* eat C++-style comments */
+#line 245 "dtc-lexer.l"
+/* eat C-style comments */
 	YY_BREAK
 case 21:
+/* rule 21 can match eol */
 YY_RULE_SETUP
-#line 242 "dtc-lexer.l"
-{ return DT_LSHIFT; };
+#line 246 "dtc-lexer.l"
+/* eat C++-style comments */
 	YY_BREAK
 case 22:
 YY_RULE_SETUP
-#line 243 "dtc-lexer.l"
-{ return DT_RSHIFT; };
+#line 248 "dtc-lexer.l"
+{ return DT_LSHIFT; };
 	YY_BREAK
 case 23:
 YY_RULE_SETUP
-#line 244 "dtc-lexer.l"
-{ return DT_LE; };
+#line 249 "dtc-lexer.l"
+{ return DT_RSHIFT; };
 	YY_BREAK
 case 24:
 YY_RULE_SETUP
-#line 245 "dtc-lexer.l"
-{ return DT_GE; };
+#line 250 "dtc-lexer.l"
+{ return DT_LE; };
 	YY_BREAK
 case 25:
 YY_RULE_SETUP
-#line 246 "dtc-lexer.l"
-{ return DT_EQ; };
+#line 251 "dtc-lexer.l"
+{ return DT_GE; };
 	YY_BREAK
 case 26:
 YY_RULE_SETUP
-#line 247 "dtc-lexer.l"
-{ return DT_NE; };
+#line 252 "dtc-lexer.l"
+{ return DT_EQ; };
 	YY_BREAK
 case 27:
 YY_RULE_SETUP
-#line 248 "dtc-lexer.l"
-{ return DT_AND; };
+#line 253 "dtc-lexer.l"
+{ return DT_NE; };
 	YY_BREAK
 case 28:
 YY_RULE_SETUP
-#line 249 "dtc-lexer.l"
-{ return DT_OR; };
+#line 254 "dtc-lexer.l"
+{ return DT_AND; };
 	YY_BREAK
 case 29:
 YY_RULE_SETUP
-#line 251 "dtc-lexer.l"
+#line 255 "dtc-lexer.l"
+{ return DT_OR; };
+	YY_BREAK
+case 30:
+YY_RULE_SETUP
+#line 257 "dtc-lexer.l"
 {
 			DPRINT("Char: %c (\\x%02x)\n", yytext[0],
 				(unsigned)yytext[0]);
@@ -1241,12 +1248,12 @@ YY_RULE_SETUP
 			return yytext[0];
 		}
 	YY_BREAK
-case 30:
+case 31:
 YY_RULE_SETUP
-#line 266 "dtc-lexer.l"
+#line 272 "dtc-lexer.l"
 ECHO;
 	YY_BREAK
-#line 1250 "dtc-lexer.lex.c"
+#line 1257 "dtc-lexer.lex.c"
 
 	case YY_END_OF_BUFFER:
 		{
@@ -1388,9 +1395,9 @@ ECHO;
  */
 static int yy_get_next_buffer (void)
 {
-    	register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
-	register char *source = (yytext_ptr);
-	register int number_to_move, i;
+	char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+	char *source = (yytext_ptr);
+	int number_to_move, i;
 	int ret_val;
 
 	if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
@@ -1419,7 +1426,7 @@ static int yy_get_next_buffer (void)
 	/* Try to read more data. */
 
 	/* First move last chars to start of buffer. */
-	number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1;
+	number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr) - 1);
 
 	for ( i = 0; i < number_to_move; ++i )
 		*(dest++) = *(source++);
@@ -1432,7 +1439,7 @@ static int yy_get_next_buffer (void)
 
 	else
 		{
-			yy_size_t num_to_read =
+			int num_to_read =
 			YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
 
 		while ( num_to_read <= 0 )
@@ -1446,7 +1453,7 @@ static int yy_get_next_buffer (void)
 
 			if ( b->yy_is_our_buffer )
 				{
-				yy_size_t new_size = b->yy_buf_size * 2;
+				int new_size = b->yy_buf_size * 2;
 
 				if ( new_size <= 0 )
 					b->yy_buf_size += b->yy_buf_size / 8;
@@ -1459,7 +1466,7 @@ static int yy_get_next_buffer (void)
 				}
 			else
 				/* Can't grow it, we don't own it. */
-				b->yy_ch_buf = 0;
+				b->yy_ch_buf = NULL;
 
 			if ( ! b->yy_ch_buf )
 				YY_FATAL_ERROR(
@@ -1501,9 +1508,9 @@ static int yy_get_next_buffer (void)
 	else
 		ret_val = EOB_ACT_CONTINUE_SCAN;
 
-	if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
+	if (((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
 		/* Extend the array by 50%, plus the number we really need. */
-		yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
+		int new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
 		YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size  );
 		if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
 			YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
@@ -1522,15 +1529,15 @@ static int yy_get_next_buffer (void)
 
     static yy_state_type yy_get_previous_state (void)
 {
-	register yy_state_type yy_current_state;
-	register char *yy_cp;
+	yy_state_type yy_current_state;
+	char *yy_cp;
     
 	yy_current_state = (yy_start);
 	yy_current_state += YY_AT_BOL();
 
 	for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
 		{
-		register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
+		YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
 		if ( yy_accept[yy_current_state] )
 			{
 			(yy_last_accepting_state) = yy_current_state;
@@ -1539,10 +1546,10 @@ static int yy_get_next_buffer (void)
 		while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 			{
 			yy_current_state = (int) yy_def[yy_current_state];
-			if ( yy_current_state >= 159 )
+			if ( yy_current_state >= 166 )
 				yy_c = yy_meta[(unsigned int) yy_c];
 			}
-		yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+		yy_current_state = yy_nxt[yy_base[yy_current_state] + (flex_int16_t) yy_c];
 		}
 
 	return yy_current_state;
@@ -1555,10 +1562,10 @@ static int yy_get_next_buffer (void)
  */
     static yy_state_type yy_try_NUL_trans  (yy_state_type yy_current_state )
 {
-	register int yy_is_jam;
-    	register char *yy_cp = (yy_c_buf_p);
+	int yy_is_jam;
+	char *yy_cp = (yy_c_buf_p);
 
-	register YY_CHAR yy_c = 1;
+	YY_CHAR yy_c = 1;
 	if ( yy_accept[yy_current_state] )
 		{
 		(yy_last_accepting_state) = yy_current_state;
@@ -1567,15 +1574,19 @@ static int yy_get_next_buffer (void)
 	while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
 		{
 		yy_current_state = (int) yy_def[yy_current_state];
-		if ( yy_current_state >= 159 )
+		if ( yy_current_state >= 166 )
 			yy_c = yy_meta[(unsigned int) yy_c];
 		}
-	yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-	yy_is_jam = (yy_current_state == 158);
+	yy_current_state = yy_nxt[yy_base[yy_current_state] + (flex_int16_t) yy_c];
+	yy_is_jam = (yy_current_state == 165);
 
 		return yy_is_jam ? 0 : yy_current_state;
 }
 
+#ifndef YY_NO_UNPUT
+
+#endif
+
 #ifndef YY_NO_INPUT
 #ifdef __cplusplus
     static int yyinput (void)
@@ -1600,7 +1611,7 @@ static int yy_get_next_buffer (void)
 
 		else
 			{ /* need more input */
-			yy_size_t offset = (yy_c_buf_p) - (yytext_ptr);
+			int offset = (yy_c_buf_p) - (yytext_ptr);
 			++(yy_c_buf_p);
 
 			switch ( yy_get_next_buffer(  ) )
@@ -1624,7 +1635,7 @@ static int yy_get_next_buffer (void)
 				case EOB_ACT_END_OF_FILE:
 					{
 					if ( yywrap( ) )
-						return EOF;
+						return 0;
 
 					if ( ! (yy_did_buffer_switch_on_eof) )
 						YY_NEW_FILE;
@@ -1727,7 +1738,7 @@ static void yy_load_buffer_state  (void)
 	if ( ! b )
 		YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
 
-	b->yy_buf_size = size;
+	b->yy_buf_size = (yy_size_t)size;
 
 	/* yy_ch_buf has to be 2 characters longer than the size given because
 	 * we need to put in 2 end-of-buffer characters.
@@ -1874,7 +1885,7 @@ void yypop_buffer_state (void)
  */
 static void yyensure_buffer_stack (void)
 {
-	yy_size_t num_to_alloc;
+	int num_to_alloc;
     
 	if (!(yy_buffer_stack)) {
 
@@ -1882,15 +1893,15 @@ static void yyensure_buffer_stack (void)
 		 * scanner will even need a stack. We use 2 instead of 1 to avoid an
 		 * immediate realloc on the next call.
          */
-		num_to_alloc = 1;
+      num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */
 		(yy_buffer_stack) = (struct yy_buffer_state**)yyalloc
 								(num_to_alloc * sizeof(struct yy_buffer_state*)
 								);
 		if ( ! (yy_buffer_stack) )
 			YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
-								  
+
 		memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-				
+
 		(yy_buffer_stack_max) = num_to_alloc;
 		(yy_buffer_stack_top) = 0;
 		return;
@@ -1899,7 +1910,7 @@ static void yyensure_buffer_stack (void)
 	if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
 
 		/* Increase the buffer to prepare for a possible push. */
-		int grow_size = 8 /* arbitrary grow size */;
+		yy_size_t grow_size = 8 /* arbitrary grow size */;
 
 		num_to_alloc = (yy_buffer_stack_max) + grow_size;
 		(yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc
@@ -1919,7 +1930,7 @@ static void yyensure_buffer_stack (void)
  * @param base the character buffer
  * @param size the size in bytes of the character buffer
  * 
- * @return the newly allocated buffer state object. 
+ * @return the newly allocated buffer state object.
  */
 YY_BUFFER_STATE yy_scan_buffer  (char * base, yy_size_t  size )
 {
@@ -1929,7 +1940,7 @@ YY_BUFFER_STATE yy_scan_buffer  (char * base, yy_size_t  size )
 	     base[size-2] != YY_END_OF_BUFFER_CHAR ||
 	     base[size-1] != YY_END_OF_BUFFER_CHAR )
 		/* They forgot to leave room for the EOB's. */
-		return 0;
+		return NULL;
 
 	b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state )  );
 	if ( ! b )
@@ -1938,7 +1949,7 @@ YY_BUFFER_STATE yy_scan_buffer  (char * base, yy_size_t  size )
 	b->yy_buf_size = size - 2;	/* "- 2" to take care of EOB's */
 	b->yy_buf_pos = b->yy_ch_buf = base;
 	b->yy_is_our_buffer = 0;
-	b->yy_input_file = 0;
+	b->yy_input_file = NULL;
 	b->yy_n_chars = b->yy_buf_size;
 	b->yy_is_interactive = 0;
 	b->yy_at_bol = 1;
@@ -1961,7 +1972,7 @@ YY_BUFFER_STATE yy_scan_buffer  (char * base, yy_size_t  size )
 YY_BUFFER_STATE yy_scan_string (yyconst char * yystr )
 {
     
-	return yy_scan_bytes(yystr,strlen(yystr) );
+	return yy_scan_bytes(yystr,(int) strlen(yystr) );
 }
 
 /** Setup the input buffer state to scan the given bytes. The next call to yylex() will
@@ -1971,15 +1982,15 @@ YY_BUFFER_STATE yy_scan_string (yyconst char * yystr )
  * 
  * @return the newly allocated buffer state object.
  */
-YY_BUFFER_STATE yy_scan_bytes  (yyconst char * yybytes, yy_size_t  _yybytes_len )
+YY_BUFFER_STATE yy_scan_bytes  (yyconst char * yybytes, int  _yybytes_len )
 {
 	YY_BUFFER_STATE b;
 	char *buf;
 	yy_size_t n;
-	yy_size_t i;
+	int i;
     
 	/* Get memory for full buffer, including space for trailing EOB's. */
-	n = _yybytes_len + 2;
+	n = (yy_size_t) (_yybytes_len + 2);
 	buf = (char *) yyalloc(n  );
 	if ( ! buf )
 		YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
@@ -2005,9 +2016,9 @@ YY_BUFFER_STATE yy_scan_bytes  (yyconst char * yybytes, yy_size_t  _yybytes_len
 #define YY_EXIT_FAILURE 2
 #endif
 
-static void yy_fatal_error (yyconst char* msg )
+static void yynoreturn yy_fatal_error (yyconst char* msg )
 {
-    	(void) fprintf( stderr, "%s\n", msg );
+			(void) fprintf( stderr, "%s\n", msg );
 	exit( YY_EXIT_FAILURE );
 }
 
@@ -2035,7 +2046,7 @@ static void yy_fatal_error (yyconst char* msg )
  */
 int yyget_lineno  (void)
 {
-        
+
     return yylineno;
 }
 
@@ -2058,7 +2069,7 @@ FILE *yyget_out  (void)
 /** Get the length of the current token.
  * 
  */
-yy_size_t yyget_leng  (void)
+int yyget_leng  (void)
 {
         return yyleng;
 }
@@ -2073,29 +2084,29 @@ char *yyget_text  (void)
 }
 
 /** Set the current line number.
- * @param line_number
+ * @param _line_number line number
  * 
  */
-void yyset_lineno (int  line_number )
+void yyset_lineno (int  _line_number )
 {
     
-    yylineno = line_number;
+    yylineno = _line_number;
 }
 
 /** Set the input stream. This does not discard the current
  * input buffer.
- * @param in_str A readable stream.
+ * @param _in_str A readable stream.
  * 
  * @see yy_switch_to_buffer
  */
-void yyset_in (FILE *  in_str )
+void yyset_in (FILE *  _in_str )
 {
-        yyin = in_str ;
+        yyin = _in_str ;
 }
 
-void yyset_out (FILE *  out_str )
+void yyset_out (FILE *  _out_str )
 {
-        yyout = out_str ;
+        yyout = _out_str ;
 }
 
 int yyget_debug  (void)
@@ -2103,9 +2114,9 @@ int yyget_debug  (void)
         return yy_flex_debug;
 }
 
-void yyset_debug (int  bdebug )
+void yyset_debug (int  _bdebug )
 {
-        yy_flex_debug = bdebug ;
+        yy_flex_debug = _bdebug ;
 }
 
 static int yy_init_globals (void)
@@ -2114,10 +2125,10 @@ static int yy_init_globals (void)
      * This function is called from yylex_destroy(), so don't allocate here.
      */
 
-    (yy_buffer_stack) = 0;
+    (yy_buffer_stack) = NULL;
     (yy_buffer_stack_top) = 0;
     (yy_buffer_stack_max) = 0;
-    (yy_c_buf_p) = (char *) 0;
+    (yy_c_buf_p) = NULL;
     (yy_init) = 0;
     (yy_start) = 0;
 
@@ -2126,8 +2137,8 @@ static int yy_init_globals (void)
     yyin = stdin;
     yyout = stdout;
 #else
-    yyin = (FILE *) 0;
-    yyout = (FILE *) 0;
+    yyin = NULL;
+    yyout = NULL;
 #endif
 
     /* For future reference: Set errno on error, since we are called by
@@ -2165,7 +2176,8 @@ int yylex_destroy  (void)
 #ifndef yytext_ptr
 static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
 {
-	register int i;
+
+	int i;
 	for ( i = 0; i < n; ++i )
 		s1[i] = s2[i];
 }
@@ -2174,7 +2186,7 @@ static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
 #ifdef YY_NEED_STRLEN
 static int yy_flex_strlen (yyconst char * s )
 {
-	register int n;
+	int n;
 	for ( n = 0; s[n]; ++n )
 		;
 
@@ -2184,11 +2196,12 @@ static int yy_flex_strlen (yyconst char * s )
 
 void *yyalloc (yy_size_t  size )
 {
-	return (void *) malloc( size );
+			return malloc(size);
 }
 
 void *yyrealloc  (void * ptr, yy_size_t  size )
 {
+
 	/* The cast to (char *) in the following accommodates both
 	 * implementations that use char* generic pointers, and those
 	 * that use void* generic pointers.  It works with the latter
@@ -2196,17 +2209,17 @@ void *yyrealloc  (void * ptr, yy_size_t  size )
 	 * any pointer type to void*, and deal with argument conversions
 	 * as though doing an assignment.
 	 */
-	return (void *) realloc( (char *) ptr, size );
+	return realloc(ptr, size);
 }
 
 void yyfree (void * ptr )
 {
-	free( (char *) ptr );	/* see yyrealloc() for (char *) cast */
+			free( (char *) ptr );	/* see yyrealloc() for (char *) cast */
 }
 
 #define YYTABLES_NAME "yytables"
 
-#line 265 "dtc-lexer.l"
+#line 272 "dtc-lexer.l"
 
 
 
diff --git a/scripts/dtc/dtc-parser.tab.c_shipped b/scripts/dtc/dtc-parser.tab.c_shipped
index 31cec50..0a7a5ed 100644
--- a/scripts/dtc/dtc-parser.tab.c_shipped
+++ b/scripts/dtc/dtc-parser.tab.c_shipped
@@ -1,8 +1,8 @@
-/* A Bison parser, made by GNU Bison 3.0.2.  */
+/* A Bison parser, made by GNU Bison 3.0.4.  */
 
 /* Bison implementation for Yacc-like parsers in C
 
-   Copyright (C) 1984, 1989-1990, 2000-2013 Free Software Foundation, Inc.
+   Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -44,7 +44,7 @@
 #define YYBISON 1
 
 /* Bison version.  */
-#define YYBISON_VERSION "3.0.2"
+#define YYBISON_VERSION "3.0.4"
 
 /* Skeleton name.  */
 #define YYSKELETON_NAME "yacc.c"
@@ -65,6 +65,7 @@
 #line 20 "dtc-parser.y" /* yacc.c:339  */
 
 #include <stdio.h>
+#include <inttypes.h>
 
 #include "dtc.h"
 #include "srcpos.h"
@@ -77,10 +78,10 @@ extern void yyerror(char const *s);
 		treesource_error = true; \
 	} while (0)
 
-extern struct boot_info *the_boot_info;
+extern struct dt_info *parser_output;
 extern bool treesource_error;
 
-#line 84 "dtc-parser.tab.c" /* yacc.c:339  */
+#line 85 "dtc-parser.tab.c" /* yacc.c:339  */
 
 # ifndef YY_NULLPTR
 #  if defined __cplusplus && 201103L <= __cplusplus
@@ -116,35 +117,36 @@ extern int yydebug;
   enum yytokentype
   {
     DT_V1 = 258,
-    DT_MEMRESERVE = 259,
-    DT_LSHIFT = 260,
-    DT_RSHIFT = 261,
-    DT_LE = 262,
-    DT_GE = 263,
-    DT_EQ = 264,
-    DT_NE = 265,
-    DT_AND = 266,
-    DT_OR = 267,
-    DT_BITS = 268,
-    DT_DEL_PROP = 269,
-    DT_DEL_NODE = 270,
-    DT_PROPNODENAME = 271,
-    DT_LITERAL = 272,
-    DT_CHAR_LITERAL = 273,
-    DT_BYTE = 274,
-    DT_STRING = 275,
-    DT_LABEL = 276,
-    DT_REF = 277,
-    DT_INCBIN = 278
+    DT_PLUGIN = 259,
+    DT_MEMRESERVE = 260,
+    DT_LSHIFT = 261,
+    DT_RSHIFT = 262,
+    DT_LE = 263,
+    DT_GE = 264,
+    DT_EQ = 265,
+    DT_NE = 266,
+    DT_AND = 267,
+    DT_OR = 268,
+    DT_BITS = 269,
+    DT_DEL_PROP = 270,
+    DT_DEL_NODE = 271,
+    DT_PROPNODENAME = 272,
+    DT_LITERAL = 273,
+    DT_CHAR_LITERAL = 274,
+    DT_BYTE = 275,
+    DT_STRING = 276,
+    DT_LABEL = 277,
+    DT_REF = 278,
+    DT_INCBIN = 279
   };
 #endif
 
 /* Value type.  */
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
-typedef union YYSTYPE YYSTYPE;
+
 union YYSTYPE
 {
-#line 38 "dtc-parser.y" /* yacc.c:355  */
+#line 39 "dtc-parser.y" /* yacc.c:355  */
 
 	char *propnodename;
 	char *labelref;
@@ -162,9 +164,12 @@ union YYSTYPE
 	struct node *nodelist;
 	struct reserve_info *re;
 	uint64_t integer;
+	unsigned int flags;
 
-#line 167 "dtc-parser.tab.c" /* yacc.c:355  */
+#line 170 "dtc-parser.tab.c" /* yacc.c:355  */
 };
+
+typedef union YYSTYPE YYSTYPE;
 # define YYSTYPE_IS_TRIVIAL 1
 # define YYSTYPE_IS_DECLARED 1
 #endif
@@ -192,7 +197,7 @@ int yyparse (void);
 
 /* Copy the second part of user declarations.  */
 
-#line 196 "dtc-parser.tab.c" /* yacc.c:358  */
+#line 201 "dtc-parser.tab.c" /* yacc.c:358  */
 
 #ifdef short
 # undef short
@@ -434,23 +439,23 @@ union yyalloc
 #endif /* !YYCOPY_NEEDED */
 
 /* YYFINAL -- State number of the termination state.  */
-#define YYFINAL  4
+#define YYFINAL  6
 /* YYLAST -- Last index in YYTABLE.  */
-#define YYLAST   136
+#define YYLAST   138
 
 /* YYNTOKENS -- Number of terminals.  */
-#define YYNTOKENS  47
+#define YYNTOKENS  48
 /* YYNNTS -- Number of nonterminals.  */
-#define YYNNTS  28
+#define YYNNTS  30
 /* YYNRULES -- Number of rules.  */
-#define YYNRULES  80
+#define YYNRULES  84
 /* YYNSTATES -- Number of states.  */
-#define YYNSTATES  144
+#define YYNSTATES  149
 
 /* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned
    by yylex, with out-of-bounds checking.  */
 #define YYUNDEFTOK  2
-#define YYMAXUTOK   278
+#define YYMAXUTOK   279
 
 #define YYTRANSLATE(YYX)                                                \
   ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
@@ -462,16 +467,16 @@ static const yytype_uint8 yytranslate[] =
        0,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,    46,     2,     2,     2,    44,    40,     2,
-      32,    34,    43,    41,    33,    42,     2,    25,     2,     2,
-       2,     2,     2,     2,     2,     2,     2,     2,    37,    24,
-      35,    28,    29,    36,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,    47,     2,     2,     2,    45,    41,     2,
+      33,    35,    44,    42,    34,    43,     2,    26,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,    38,    25,
+      36,    29,    30,    37,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,    30,     2,    31,    39,     2,     2,     2,     2,     2,
+       2,    31,     2,    32,    40,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-       2,     2,     2,    26,    38,    27,    45,     2,     2,     2,
+       2,     2,     2,    27,    39,    28,    46,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
@@ -486,22 +491,22 @@ static const yytype_uint8 yytranslate[] =
        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
        2,     2,     2,     2,     2,     2,     1,     2,     3,     4,
        5,     6,     7,     8,     9,    10,    11,    12,    13,    14,
-      15,    16,    17,    18,    19,    20,    21,    22,    23
+      15,    16,    17,    18,    19,    20,    21,    22,    23,    24
 };
 
 #if YYDEBUG
   /* YYRLINE[YYN] -- Source line where rule number YYN was defined.  */
 static const yytype_uint16 yyrline[] =
 {
-       0,   104,   104,   113,   116,   123,   127,   135,   139,   144,
-     155,   165,   180,   188,   191,   198,   202,   206,   210,   218,
-     222,   226,   230,   234,   250,   260,   268,   271,   275,   282,
-     298,   303,   322,   336,   343,   344,   345,   352,   356,   357,
-     361,   362,   366,   367,   371,   372,   376,   377,   381,   382,
-     386,   387,   388,   392,   393,   394,   395,   396,   400,   401,
-     402,   406,   407,   408,   412,   413,   422,   431,   435,   436,
-     437,   438,   443,   446,   450,   458,   461,   465,   473,   477,
-     481
+       0,   109,   109,   117,   121,   128,   129,   139,   142,   149,
+     153,   161,   165,   170,   181,   191,   206,   214,   217,   224,
+     228,   232,   236,   244,   248,   252,   256,   260,   276,   286,
+     294,   297,   301,   308,   324,   329,   348,   362,   369,   370,
+     371,   378,   382,   383,   387,   388,   392,   393,   397,   398,
+     402,   403,   407,   408,   412,   413,   414,   418,   419,   420,
+     421,   422,   426,   427,   428,   432,   433,   434,   438,   439,
+     448,   457,   461,   462,   463,   464,   469,   472,   476,   484,
+     487,   491,   499,   503,   507
 };
 #endif
 
@@ -510,19 +515,20 @@ static const yytype_uint16 yyrline[] =
    First, the terminals, then, starting at YYNTOKENS, nonterminals.  */
 static const char *const yytname[] =
 {
-  "$end", "error", "$undefined", "DT_V1", "DT_MEMRESERVE", "DT_LSHIFT",
-  "DT_RSHIFT", "DT_LE", "DT_GE", "DT_EQ", "DT_NE", "DT_AND", "DT_OR",
-  "DT_BITS", "DT_DEL_PROP", "DT_DEL_NODE", "DT_PROPNODENAME", "DT_LITERAL",
-  "DT_CHAR_LITERAL", "DT_BYTE", "DT_STRING", "DT_LABEL", "DT_REF",
-  "DT_INCBIN", "';'", "'/'", "'{'", "'}'", "'='", "'>'", "'['", "']'",
-  "'('", "','", "')'", "'<'", "'?'", "':'", "'|'", "'^'", "'&'", "'+'",
-  "'-'", "'*'", "'%'", "'~'", "'!'", "$accept", "sourcefile",
-  "memreserves", "memreserve", "devicetree", "nodedef", "proplist",
-  "propdef", "propdata", "propdataprefix", "arrayprefix", "integer_prim",
-  "integer_expr", "integer_trinary", "integer_or", "integer_and",
-  "integer_bitor", "integer_bitxor", "integer_bitand", "integer_eq",
-  "integer_rela", "integer_shift", "integer_add", "integer_mul",
-  "integer_unary", "bytestring", "subnodes", "subnode", YY_NULLPTR
+  "$end", "error", "$undefined", "DT_V1", "DT_PLUGIN", "DT_MEMRESERVE",
+  "DT_LSHIFT", "DT_RSHIFT", "DT_LE", "DT_GE", "DT_EQ", "DT_NE", "DT_AND",
+  "DT_OR", "DT_BITS", "DT_DEL_PROP", "DT_DEL_NODE", "DT_PROPNODENAME",
+  "DT_LITERAL", "DT_CHAR_LITERAL", "DT_BYTE", "DT_STRING", "DT_LABEL",
+  "DT_REF", "DT_INCBIN", "';'", "'/'", "'{'", "'}'", "'='", "'>'", "'['",
+  "']'", "'('", "','", "')'", "'<'", "'?'", "':'", "'|'", "'^'", "'&'",
+  "'+'", "'-'", "'*'", "'%'", "'~'", "'!'", "$accept", "sourcefile",
+  "header", "headers", "memreserves", "memreserve", "devicetree",
+  "nodedef", "proplist", "propdef", "propdata", "propdataprefix",
+  "arrayprefix", "integer_prim", "integer_expr", "integer_trinary",
+  "integer_or", "integer_and", "integer_bitor", "integer_bitxor",
+  "integer_bitand", "integer_eq", "integer_rela", "integer_shift",
+  "integer_add", "integer_mul", "integer_unary", "bytestring", "subnodes",
+  "subnode", YY_NULLPTR
 };
 #endif
 
@@ -533,16 +539,16 @@ static const yytype_uint16 yytoknum[] =
 {
        0,   256,   257,   258,   259,   260,   261,   262,   263,   264,
      265,   266,   267,   268,   269,   270,   271,   272,   273,   274,
-     275,   276,   277,   278,    59,    47,   123,   125,    61,    62,
-      91,    93,    40,    44,    41,    60,    63,    58,   124,    94,
-      38,    43,    45,    42,    37,   126,    33
+     275,   276,   277,   278,   279,    59,    47,   123,   125,    61,
+      62,    91,    93,    40,    44,    41,    60,    63,    58,   124,
+      94,    38,    43,    45,    42,    37,   126,    33
 };
 # endif
 
-#define YYPACT_NINF -81
+#define YYPACT_NINF -44
 
 #define yypact_value_is_default(Yystate) \
-  (!!((Yystate) == (-81)))
+  (!!((Yystate) == (-44)))
 
 #define YYTABLE_NINF -1
 
@@ -553,21 +559,21 @@ static const yytype_uint16 yytoknum[] =
      STATE-NUM.  */
 static const yytype_int8 yypact[] =
 {
-      16,   -11,    21,    10,   -81,    25,    10,    19,    10,   -81,
-     -81,    -9,    25,   -81,     2,    51,   -81,    -9,    -9,    -9,
-     -81,     1,   -81,    -6,    50,    14,    28,    29,    36,     3,
-      58,    44,    -3,   -81,    47,   -81,   -81,    65,    68,     2,
-       2,   -81,   -81,   -81,   -81,    -9,    -9,    -9,    -9,    -9,
-      -9,    -9,    -9,    -9,    -9,    -9,    -9,    -9,    -9,    -9,
-      -9,    -9,    -9,    -9,   -81,    63,    69,     2,   -81,   -81,
-      50,    57,    14,    28,    29,    36,     3,     3,    58,    58,
-      58,    58,    44,    44,    -3,    -3,   -81,   -81,   -81,    79,
-      80,    -8,    63,   -81,    72,    63,   -81,   -81,    -9,    76,
-      77,   -81,   -81,   -81,   -81,   -81,    78,   -81,   -81,   -81,
-     -81,   -81,    35,     4,   -81,   -81,   -81,   -81,    86,   -81,
-     -81,   -81,    73,   -81,   -81,    33,    71,    84,    39,   -81,
-     -81,   -81,   -81,   -81,    41,   -81,   -81,   -81,    25,   -81,
-      74,    25,    75,   -81
+      14,    27,    61,    14,     8,    18,   -44,   -44,    37,     8,
+      40,     8,    64,   -44,   -44,   -12,    37,   -44,    50,    52,
+     -44,   -44,   -12,   -12,   -12,   -44,    51,   -44,    -4,    78,
+      53,    54,    55,    17,     2,    30,    38,    -3,   -44,    66,
+     -44,   -44,    70,    72,    50,    50,   -44,   -44,   -44,   -44,
+     -12,   -12,   -12,   -12,   -12,   -12,   -12,   -12,   -12,   -12,
+     -12,   -12,   -12,   -12,   -12,   -12,   -12,   -12,   -12,   -44,
+       3,    73,    50,   -44,   -44,    78,    59,    53,    54,    55,
+      17,     2,     2,    30,    30,    30,    30,    38,    38,    -3,
+      -3,   -44,   -44,   -44,    82,    83,    44,     3,   -44,    74,
+       3,   -44,   -44,   -12,    76,    79,   -44,   -44,   -44,   -44,
+     -44,    80,   -44,   -44,   -44,   -44,   -44,   -10,    36,   -44,
+     -44,   -44,   -44,    85,   -44,   -44,   -44,    75,   -44,   -44,
+      21,    71,    88,    -6,   -44,   -44,   -44,   -44,   -44,    11,
+     -44,   -44,   -44,    37,   -44,    77,    37,    81,   -44
 };
 
   /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
@@ -575,37 +581,37 @@ static const yytype_int8 yypact[] =
      means the default is an error.  */
 static const yytype_uint8 yydefact[] =
 {
-       0,     0,     0,     3,     1,     0,     0,     0,     3,    34,
-      35,     0,     0,     6,     0,     2,     4,     0,     0,     0,
-      68,     0,    37,    38,    40,    42,    44,    46,    48,    50,
-      53,    60,    63,    67,     0,    13,     7,     0,     0,     0,
-       0,    69,    70,    71,    36,     0,     0,     0,     0,     0,
+       0,     0,     0,     5,     7,     3,     1,     6,     0,     0,
+       0,     7,     0,    38,    39,     0,     0,    10,     0,     2,
+       8,     4,     0,     0,     0,    72,     0,    41,    42,    44,
+      46,    48,    50,    52,    54,    57,    64,    67,    71,     0,
+      17,    11,     0,     0,     0,     0,    73,    74,    75,    40,
        0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
-       0,     0,     0,     0,     5,    75,     0,     0,    10,     8,
-      41,     0,    43,    45,    47,    49,    51,    52,    56,    57,
-      55,    54,    58,    59,    61,    62,    65,    64,    66,     0,
-       0,     0,     0,    14,     0,    75,    11,     9,     0,     0,
-       0,    16,    26,    78,    18,    80,     0,    77,    76,    39,
-      17,    79,     0,     0,    12,    25,    15,    27,     0,    19,
-      28,    22,     0,    72,    30,     0,     0,     0,     0,    33,
-      32,    20,    31,    29,     0,    73,    74,    21,     0,    24,
-       0,     0,     0,    23
+       0,     0,     0,     0,     0,     0,     0,     0,     0,     9,
+      79,     0,     0,    14,    12,    45,     0,    47,    49,    51,
+      53,    55,    56,    60,    61,    59,    58,    62,    63,    65,
+      66,    69,    68,    70,     0,     0,     0,     0,    18,     0,
+      79,    15,    13,     0,     0,     0,    20,    30,    82,    22,
+      84,     0,    81,    80,    43,    21,    83,     0,     0,    16,
+      29,    19,    31,     0,    23,    32,    26,     0,    76,    34,
+       0,     0,     0,     0,    37,    36,    24,    35,    33,     0,
+      77,    78,    25,     0,    28,     0,     0,     0,    27
 };
 
   /* YYPGOTO[NTERM-NUM].  */
 static const yytype_int8 yypgoto[] =
 {
-     -81,   -81,   100,   104,   -81,   -38,   -81,   -80,   -81,   -81,
-     -81,    -5,    66,    13,   -81,    70,    67,    81,    64,    82,
-      37,    27,    34,    38,   -14,   -81,    22,    24
+     -44,   -44,   -44,   103,    99,   104,   -44,   -43,   -44,   -21,
+     -44,   -44,   -44,    -8,    63,     9,   -44,    65,    67,    68,
+      69,    62,    26,     4,    22,    23,   -19,   -44,    20,    28
 };
 
   /* YYDEFGOTO[NTERM-NUM].  */
 static const yytype_int16 yydefgoto[] =
 {
-      -1,     2,     7,     8,    15,    36,    65,    93,   112,   113,
-     125,    20,    21,    22,    23,    24,    25,    26,    27,    28,
-      29,    30,    31,    32,    33,   128,    94,    95
+      -1,     2,     3,     4,    10,    11,    19,    41,    70,    98,
+     117,   118,   130,    25,    26,    27,    28,    29,    30,    31,
+      32,    33,    34,    35,    36,    37,    38,   133,    99,   100
 };
 
   /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM.  If
@@ -613,87 +619,87 @@ static const yytype_int16 yydefgoto[] =
      number is the opposite.  If YYTABLE_NINF, syntax error.  */
 static const yytype_uint8 yytable[] =
 {
-      12,    68,    69,    41,    42,    43,    45,    34,     9,    10,
-      53,    54,   104,     3,     5,   107,   101,   118,    35,     1,
-     102,     4,    61,    11,   119,   120,   121,   122,    35,    97,
-      46,     6,    55,    17,   123,    44,    18,    19,    56,   124,
-      62,    63,     9,    10,    14,    51,    52,    86,    87,    88,
-       9,    10,    48,   103,   129,   130,   115,    11,   135,   116,
-     136,    47,   131,    57,    58,    11,    37,    49,   117,    50,
-     137,    64,    38,    39,   138,   139,    40,    89,    90,    91,
-      78,    79,    80,    81,    92,    59,    60,    66,    76,    77,
-      67,    82,    83,    96,    98,    99,   100,    84,    85,   106,
-     110,   111,   114,   126,   134,   127,   133,   141,    16,   143,
-      13,   109,    71,    74,    72,    70,   105,   108,     0,     0,
-     132,     0,     0,     0,     0,     0,     0,     0,     0,    73,
-       0,     0,    75,   140,     0,     0,   142
+      16,    73,    74,    46,    47,    48,    13,    14,    39,    50,
+      58,    59,   120,     8,   140,   121,   141,     1,    94,    95,
+      96,    15,    12,    66,   122,    97,   142,    56,    57,   102,
+       9,    22,    60,    51,    23,    24,    62,    63,    61,    13,
+      14,    67,    68,   134,   135,   143,   144,    91,    92,    93,
+     123,   136,     5,   108,    15,    13,    14,   124,   125,   126,
+     127,     6,    83,    84,    85,    86,    18,   128,    42,   106,
+      15,    40,   129,   107,    43,    44,   109,    40,    45,   112,
+      64,    65,    81,    82,    87,    88,    49,    89,    90,    21,
+      52,    69,    53,    71,    54,    72,    55,   103,   101,   104,
+     105,   115,   111,   131,   116,   119,     7,   138,   132,   139,
+      20,   146,   114,    17,    76,    75,   148,    80,     0,    77,
+     113,    78,   137,    79,     0,   110,     0,     0,     0,     0,
+       0,     0,     0,     0,     0,   145,     0,     0,   147
 };
 
 static const yytype_int16 yycheck[] =
 {
-       5,    39,    40,    17,    18,    19,    12,    12,    17,    18,
-       7,     8,    92,    24,     4,    95,    24,    13,    26,     3,
-      28,     0,    25,    32,    20,    21,    22,    23,    26,    67,
-      36,    21,    29,    42,    30,    34,    45,    46,    35,    35,
-      43,    44,    17,    18,    25,     9,    10,    61,    62,    63,
-      17,    18,    38,    91,    21,    22,    21,    32,    19,    24,
-      21,    11,    29,     5,     6,    32,    15,    39,    33,    40,
-      31,    24,    21,    22,    33,    34,    25,    14,    15,    16,
-      53,    54,    55,    56,    21,    41,    42,    22,    51,    52,
-      22,    57,    58,    24,    37,    16,    16,    59,    60,    27,
-      24,    24,    24,    17,    20,    32,    35,    33,     8,    34,
-       6,    98,    46,    49,    47,    45,    92,    95,    -1,    -1,
-     125,    -1,    -1,    -1,    -1,    -1,    -1,    -1,    -1,    48,
-      -1,    -1,    50,   138,    -1,    -1,   141
+       8,    44,    45,    22,    23,    24,    18,    19,    16,    13,
+       8,     9,    22,     5,    20,    25,    22,     3,    15,    16,
+      17,    33,     4,    26,    34,    22,    32,    10,    11,    72,
+      22,    43,    30,    37,    46,    47,     6,     7,    36,    18,
+      19,    44,    45,    22,    23,    34,    35,    66,    67,    68,
+      14,    30,    25,    96,    33,    18,    19,    21,    22,    23,
+      24,     0,    58,    59,    60,    61,    26,    31,    16,    25,
+      33,    27,    36,    29,    22,    23,    97,    27,    26,   100,
+      42,    43,    56,    57,    62,    63,    35,    64,    65,    25,
+      12,    25,    39,    23,    40,    23,    41,    38,    25,    17,
+      17,    25,    28,    18,    25,    25,     3,    36,    33,    21,
+      11,    34,   103,     9,    51,    50,    35,    55,    -1,    52,
+     100,    53,   130,    54,    -1,    97,    -1,    -1,    -1,    -1,
+      -1,    -1,    -1,    -1,    -1,   143,    -1,    -1,   146
 };
 
   /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
      symbol of state STATE-NUM.  */
 static const yytype_uint8 yystos[] =
 {
-       0,     3,    48,    24,     0,     4,    21,    49,    50,    17,
-      18,    32,    58,    50,    25,    51,    49,    42,    45,    46,
-      58,    59,    60,    61,    62,    63,    64,    65,    66,    67,
-      68,    69,    70,    71,    58,    26,    52,    15,    21,    22,
-      25,    71,    71,    71,    34,    12,    36,    11,    38,    39,
-      40,     9,    10,     7,     8,    29,    35,     5,     6,    41,
-      42,    25,    43,    44,    24,    53,    22,    22,    52,    52,
-      62,    59,    63,    64,    65,    66,    67,    67,    68,    68,
-      68,    68,    69,    69,    70,    70,    71,    71,    71,    14,
-      15,    16,    21,    54,    73,    74,    24,    52,    37,    16,
-      16,    24,    28,    52,    54,    74,    27,    54,    73,    60,
-      24,    24,    55,    56,    24,    21,    24,    33,    13,    20,
-      21,    22,    23,    30,    35,    57,    17,    32,    72,    21,
-      22,    29,    58,    35,    20,    19,    21,    31,    33,    34,
-      58,    33,    58,    34
+       0,     3,    49,    50,    51,    25,     0,    51,     5,    22,
+      52,    53,     4,    18,    19,    33,    61,    53,    26,    54,
+      52,    25,    43,    46,    47,    61,    62,    63,    64,    65,
+      66,    67,    68,    69,    70,    71,    72,    73,    74,    61,
+      27,    55,    16,    22,    23,    26,    74,    74,    74,    35,
+      13,    37,    12,    39,    40,    41,    10,    11,     8,     9,
+      30,    36,     6,     7,    42,    43,    26,    44,    45,    25,
+      56,    23,    23,    55,    55,    65,    62,    66,    67,    68,
+      69,    70,    70,    71,    71,    71,    71,    72,    72,    73,
+      73,    74,    74,    74,    15,    16,    17,    22,    57,    76,
+      77,    25,    55,    38,    17,    17,    25,    29,    55,    57,
+      77,    28,    57,    76,    63,    25,    25,    58,    59,    25,
+      22,    25,    34,    14,    21,    22,    23,    24,    31,    36,
+      60,    18,    33,    75,    22,    23,    30,    61,    36,    21,
+      20,    22,    32,    34,    35,    61,    34,    61,    35
 };
 
   /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives.  */
 static const yytype_uint8 yyr1[] =
 {
-       0,    47,    48,    49,    49,    50,    50,    51,    51,    51,
-      51,    51,    52,    53,    53,    54,    54,    54,    54,    55,
-      55,    55,    55,    55,    55,    55,    56,    56,    56,    57,
-      57,    57,    57,    57,    58,    58,    58,    59,    60,    60,
-      61,    61,    62,    62,    63,    63,    64,    64,    65,    65,
-      66,    66,    66,    67,    67,    67,    67,    67,    68,    68,
-      68,    69,    69,    69,    70,    70,    70,    70,    71,    71,
-      71,    71,    72,    72,    72,    73,    73,    73,    74,    74,
-      74
+       0,    48,    49,    50,    50,    51,    51,    52,    52,    53,
+      53,    54,    54,    54,    54,    54,    55,    56,    56,    57,
+      57,    57,    57,    58,    58,    58,    58,    58,    58,    58,
+      59,    59,    59,    60,    60,    60,    60,    60,    61,    61,
+      61,    62,    63,    63,    64,    64,    65,    65,    66,    66,
+      67,    67,    68,    68,    69,    69,    69,    70,    70,    70,
+      70,    70,    71,    71,    71,    72,    72,    72,    73,    73,
+      73,    73,    74,    74,    74,    74,    75,    75,    75,    76,
+      76,    76,    77,    77,    77
 };
 
   /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN.  */
 static const yytype_uint8 yyr2[] =
 {
-       0,     2,     4,     0,     2,     4,     2,     2,     3,     4,
-       3,     4,     5,     0,     2,     4,     2,     3,     2,     2,
-       3,     4,     2,     9,     5,     2,     0,     2,     2,     3,
-       1,     2,     2,     2,     1,     1,     3,     1,     1,     5,
-       1,     3,     1,     3,     1,     3,     1,     3,     1,     3,
-       1,     3,     3,     1,     3,     3,     3,     3,     3,     3,
-       1,     3,     3,     1,     3,     3,     3,     1,     1,     2,
-       2,     2,     0,     2,     2,     0,     2,     2,     2,     3,
-       2
+       0,     2,     3,     2,     4,     1,     2,     0,     2,     4,
+       2,     2,     3,     4,     3,     4,     5,     0,     2,     4,
+       2,     3,     2,     2,     3,     4,     2,     9,     5,     2,
+       0,     2,     2,     3,     1,     2,     2,     2,     1,     1,
+       3,     1,     1,     5,     1,     3,     1,     3,     1,     3,
+       1,     3,     1,     3,     1,     3,     3,     1,     3,     3,
+       3,     3,     3,     3,     1,     3,     3,     1,     3,     3,
+       3,     1,     1,     2,     2,     2,     0,     2,     2,     0,
+       2,     2,     2,     3,     2
 };
 
 
@@ -1463,80 +1469,106 @@ yyreduce:
   switch (yyn)
     {
         case 2:
-#line 105 "dtc-parser.y" /* yacc.c:1646  */
+#line 110 "dtc-parser.y" /* yacc.c:1646  */
     {
-			the_boot_info = build_boot_info((yyvsp[-1].re), (yyvsp[0].node),
-							guess_boot_cpuid((yyvsp[0].node)));
+			parser_output = build_dt_info((yyvsp[-2].flags), (yyvsp[-1].re), (yyvsp[0].node),
+			                              guess_boot_cpuid((yyvsp[0].node)));
 		}
-#line 1472 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1478 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
   case 3:
-#line 113 "dtc-parser.y" /* yacc.c:1646  */
+#line 118 "dtc-parser.y" /* yacc.c:1646  */
     {
-			(yyval.re) = NULL;
+			(yyval.flags) = DTSF_V1;
 		}
-#line 1480 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1486 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
   case 4:
-#line 117 "dtc-parser.y" /* yacc.c:1646  */
+#line 122 "dtc-parser.y" /* yacc.c:1646  */
+    {
+			(yyval.flags) = DTSF_V1 | DTSF_PLUGIN;
+		}
+#line 1494 "dtc-parser.tab.c" /* yacc.c:1646  */
+    break;
+
+  case 6:
+#line 130 "dtc-parser.y" /* yacc.c:1646  */
+    {
+			if ((yyvsp[0].flags) != (yyvsp[-1].flags))
+				ERROR(&(yylsp[0]), "Header flags don't match earlier ones");
+			(yyval.flags) = (yyvsp[-1].flags);
+		}
+#line 1504 "dtc-parser.tab.c" /* yacc.c:1646  */
+    break;
+
+  case 7:
+#line 139 "dtc-parser.y" /* yacc.c:1646  */
+    {
+			(yyval.re) = NULL;
+		}
+#line 1512 "dtc-parser.tab.c" /* yacc.c:1646  */
+    break;
+
+  case 8:
+#line 143 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.re) = chain_reserve_entry((yyvsp[-1].re), (yyvsp[0].re));
 		}
-#line 1488 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1520 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 5:
-#line 124 "dtc-parser.y" /* yacc.c:1646  */
+  case 9:
+#line 150 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.re) = build_reserve_entry((yyvsp[-2].integer), (yyvsp[-1].integer));
 		}
-#line 1496 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1528 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 6:
-#line 128 "dtc-parser.y" /* yacc.c:1646  */
+  case 10:
+#line 154 "dtc-parser.y" /* yacc.c:1646  */
     {
 			add_label(&(yyvsp[0].re)->labels, (yyvsp[-1].labelref));
 			(yyval.re) = (yyvsp[0].re);
 		}
-#line 1505 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1537 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 7:
-#line 136 "dtc-parser.y" /* yacc.c:1646  */
+  case 11:
+#line 162 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.node) = name_node((yyvsp[0].node), "");
 		}
-#line 1513 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1545 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 8:
-#line 140 "dtc-parser.y" /* yacc.c:1646  */
+  case 12:
+#line 166 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.node) = merge_nodes((yyvsp[-2].node), (yyvsp[0].node));
 		}
-#line 1521 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1553 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 9:
-#line 145 "dtc-parser.y" /* yacc.c:1646  */
+  case 13:
+#line 171 "dtc-parser.y" /* yacc.c:1646  */
     {
 			struct node *target = get_node_by_ref((yyvsp[-3].node), (yyvsp[-1].labelref));
 
-			add_label(&target->labels, (yyvsp[-2].labelref));
-			if (target)
+			if (target) {
+				add_label(&target->labels, (yyvsp[-2].labelref));
 				merge_nodes(target, (yyvsp[0].node));
-			else
+			} else
 				ERROR(&(yylsp[-1]), "Label or path %s not found", (yyvsp[-1].labelref));
 			(yyval.node) = (yyvsp[-3].node);
 		}
-#line 1536 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1568 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 10:
-#line 156 "dtc-parser.y" /* yacc.c:1646  */
+  case 14:
+#line 182 "dtc-parser.y" /* yacc.c:1646  */
     {
 			struct node *target = get_node_by_ref((yyvsp[-2].node), (yyvsp[-1].labelref));
 
@@ -1546,11 +1578,11 @@ yyreduce:
 				ERROR(&(yylsp[-1]), "Label or path %s not found", (yyvsp[-1].labelref));
 			(yyval.node) = (yyvsp[-2].node);
 		}
-#line 1550 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1582 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 11:
-#line 166 "dtc-parser.y" /* yacc.c:1646  */
+  case 15:
+#line 192 "dtc-parser.y" /* yacc.c:1646  */
     {
 			struct node *target = get_node_by_ref((yyvsp[-3].node), (yyvsp[-1].labelref));
 
@@ -1562,100 +1594,100 @@ yyreduce:
 
 			(yyval.node) = (yyvsp[-3].node);
 		}
-#line 1566 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1598 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 12:
-#line 181 "dtc-parser.y" /* yacc.c:1646  */
+  case 16:
+#line 207 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.node) = build_node((yyvsp[-3].proplist), (yyvsp[-2].nodelist));
 		}
-#line 1574 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1606 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 13:
-#line 188 "dtc-parser.y" /* yacc.c:1646  */
+  case 17:
+#line 214 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.proplist) = NULL;
 		}
-#line 1582 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1614 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 14:
-#line 192 "dtc-parser.y" /* yacc.c:1646  */
+  case 18:
+#line 218 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.proplist) = chain_property((yyvsp[0].prop), (yyvsp[-1].proplist));
 		}
-#line 1590 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1622 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 15:
-#line 199 "dtc-parser.y" /* yacc.c:1646  */
+  case 19:
+#line 225 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.prop) = build_property((yyvsp[-3].propnodename), (yyvsp[-1].data));
 		}
-#line 1598 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1630 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 16:
-#line 203 "dtc-parser.y" /* yacc.c:1646  */
+  case 20:
+#line 229 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.prop) = build_property((yyvsp[-1].propnodename), empty_data);
 		}
-#line 1606 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1638 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 17:
-#line 207 "dtc-parser.y" /* yacc.c:1646  */
+  case 21:
+#line 233 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.prop) = build_property_delete((yyvsp[-1].propnodename));
 		}
-#line 1614 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1646 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 18:
-#line 211 "dtc-parser.y" /* yacc.c:1646  */
+  case 22:
+#line 237 "dtc-parser.y" /* yacc.c:1646  */
     {
 			add_label(&(yyvsp[0].prop)->labels, (yyvsp[-1].labelref));
 			(yyval.prop) = (yyvsp[0].prop);
 		}
-#line 1623 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1655 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 19:
-#line 219 "dtc-parser.y" /* yacc.c:1646  */
+  case 23:
+#line 245 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.data) = data_merge((yyvsp[-1].data), (yyvsp[0].data));
 		}
-#line 1631 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1663 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 20:
-#line 223 "dtc-parser.y" /* yacc.c:1646  */
+  case 24:
+#line 249 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.data) = data_merge((yyvsp[-2].data), (yyvsp[-1].array).data);
 		}
-#line 1639 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1671 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 21:
-#line 227 "dtc-parser.y" /* yacc.c:1646  */
+  case 25:
+#line 253 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.data) = data_merge((yyvsp[-3].data), (yyvsp[-1].data));
 		}
-#line 1647 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1679 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 22:
-#line 231 "dtc-parser.y" /* yacc.c:1646  */
+  case 26:
+#line 257 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.data) = data_add_marker((yyvsp[-1].data), REF_PATH, (yyvsp[0].labelref));
 		}
-#line 1655 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1687 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 23:
-#line 235 "dtc-parser.y" /* yacc.c:1646  */
+  case 27:
+#line 261 "dtc-parser.y" /* yacc.c:1646  */
     {
 			FILE *f = srcfile_relative_open((yyvsp[-5].data).val, NULL);
 			struct data d;
@@ -1671,11 +1703,11 @@ yyreduce:
 			(yyval.data) = data_merge((yyvsp[-8].data), d);
 			fclose(f);
 		}
-#line 1675 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1707 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 24:
-#line 251 "dtc-parser.y" /* yacc.c:1646  */
+  case 28:
+#line 277 "dtc-parser.y" /* yacc.c:1646  */
     {
 			FILE *f = srcfile_relative_open((yyvsp[-1].data).val, NULL);
 			struct data d = empty_data;
@@ -1685,43 +1717,43 @@ yyreduce:
 			(yyval.data) = data_merge((yyvsp[-4].data), d);
 			fclose(f);
 		}
-#line 1689 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1721 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 25:
-#line 261 "dtc-parser.y" /* yacc.c:1646  */
+  case 29:
+#line 287 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.data) = data_add_marker((yyvsp[-1].data), LABEL, (yyvsp[0].labelref));
 		}
-#line 1697 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1729 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 26:
-#line 268 "dtc-parser.y" /* yacc.c:1646  */
+  case 30:
+#line 294 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.data) = empty_data;
 		}
-#line 1705 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1737 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 27:
-#line 272 "dtc-parser.y" /* yacc.c:1646  */
+  case 31:
+#line 298 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.data) = (yyvsp[-1].data);
 		}
-#line 1713 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1745 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 28:
-#line 276 "dtc-parser.y" /* yacc.c:1646  */
+  case 32:
+#line 302 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.data) = data_add_marker((yyvsp[-1].data), LABEL, (yyvsp[0].labelref));
 		}
-#line 1721 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1753 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 29:
-#line 283 "dtc-parser.y" /* yacc.c:1646  */
+  case 33:
+#line 309 "dtc-parser.y" /* yacc.c:1646  */
     {
 			unsigned long long bits;
 
@@ -1737,20 +1769,20 @@ yyreduce:
 			(yyval.array).data = empty_data;
 			(yyval.array).bits = bits;
 		}
-#line 1741 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1773 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 30:
-#line 299 "dtc-parser.y" /* yacc.c:1646  */
+  case 34:
+#line 325 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.array).data = empty_data;
 			(yyval.array).bits = 32;
 		}
-#line 1750 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1782 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 31:
-#line 304 "dtc-parser.y" /* yacc.c:1646  */
+  case 35:
+#line 330 "dtc-parser.y" /* yacc.c:1646  */
     {
 			if ((yyvsp[-1].array).bits < 64) {
 				uint64_t mask = (1ULL << (yyvsp[-1].array).bits) - 1;
@@ -1769,11 +1801,11 @@ yyreduce:
 
 			(yyval.array).data = data_append_integer((yyvsp[-1].array).data, (yyvsp[0].integer), (yyvsp[-1].array).bits);
 		}
-#line 1773 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1805 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 32:
-#line 323 "dtc-parser.y" /* yacc.c:1646  */
+  case 36:
+#line 349 "dtc-parser.y" /* yacc.c:1646  */
     {
 			uint64_t val = ~0ULL >> (64 - (yyvsp[-1].array).bits);
 
@@ -1787,129 +1819,129 @@ yyreduce:
 
 			(yyval.array).data = data_append_integer((yyvsp[-1].array).data, val, (yyvsp[-1].array).bits);
 		}
-#line 1791 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1823 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 33:
-#line 337 "dtc-parser.y" /* yacc.c:1646  */
+  case 37:
+#line 363 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.array).data = data_add_marker((yyvsp[-1].array).data, LABEL, (yyvsp[0].labelref));
 		}
-#line 1799 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1831 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 36:
-#line 346 "dtc-parser.y" /* yacc.c:1646  */
+  case 40:
+#line 372 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.integer) = (yyvsp[-1].integer);
 		}
-#line 1807 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1839 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 39:
-#line 357 "dtc-parser.y" /* yacc.c:1646  */
+  case 43:
+#line 383 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-4].integer) ? (yyvsp[-2].integer) : (yyvsp[0].integer); }
-#line 1813 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1845 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 41:
-#line 362 "dtc-parser.y" /* yacc.c:1646  */
+  case 45:
+#line 388 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) || (yyvsp[0].integer); }
-#line 1819 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1851 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 43:
-#line 367 "dtc-parser.y" /* yacc.c:1646  */
+  case 47:
+#line 393 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) && (yyvsp[0].integer); }
-#line 1825 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1857 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 45:
-#line 372 "dtc-parser.y" /* yacc.c:1646  */
+  case 49:
+#line 398 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) | (yyvsp[0].integer); }
-#line 1831 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1863 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 47:
-#line 377 "dtc-parser.y" /* yacc.c:1646  */
+  case 51:
+#line 403 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) ^ (yyvsp[0].integer); }
-#line 1837 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1869 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 49:
-#line 382 "dtc-parser.y" /* yacc.c:1646  */
+  case 53:
+#line 408 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) & (yyvsp[0].integer); }
-#line 1843 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1875 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 51:
-#line 387 "dtc-parser.y" /* yacc.c:1646  */
+  case 55:
+#line 413 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) == (yyvsp[0].integer); }
-#line 1849 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1881 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 52:
-#line 388 "dtc-parser.y" /* yacc.c:1646  */
+  case 56:
+#line 414 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) != (yyvsp[0].integer); }
-#line 1855 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1887 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 54:
-#line 393 "dtc-parser.y" /* yacc.c:1646  */
+  case 58:
+#line 419 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) < (yyvsp[0].integer); }
-#line 1861 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1893 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 55:
-#line 394 "dtc-parser.y" /* yacc.c:1646  */
+  case 59:
+#line 420 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) > (yyvsp[0].integer); }
-#line 1867 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1899 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 56:
-#line 395 "dtc-parser.y" /* yacc.c:1646  */
+  case 60:
+#line 421 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) <= (yyvsp[0].integer); }
-#line 1873 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1905 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 57:
-#line 396 "dtc-parser.y" /* yacc.c:1646  */
+  case 61:
+#line 422 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) >= (yyvsp[0].integer); }
-#line 1879 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1911 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 58:
-#line 400 "dtc-parser.y" /* yacc.c:1646  */
+  case 62:
+#line 426 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) << (yyvsp[0].integer); }
-#line 1885 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1917 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 59:
-#line 401 "dtc-parser.y" /* yacc.c:1646  */
+  case 63:
+#line 427 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) >> (yyvsp[0].integer); }
-#line 1891 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1923 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 61:
-#line 406 "dtc-parser.y" /* yacc.c:1646  */
+  case 65:
+#line 432 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) + (yyvsp[0].integer); }
-#line 1897 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1929 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 62:
-#line 407 "dtc-parser.y" /* yacc.c:1646  */
+  case 66:
+#line 433 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) - (yyvsp[0].integer); }
-#line 1903 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1935 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 64:
-#line 412 "dtc-parser.y" /* yacc.c:1646  */
+  case 68:
+#line 438 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = (yyvsp[-2].integer) * (yyvsp[0].integer); }
-#line 1909 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1941 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 65:
-#line 414 "dtc-parser.y" /* yacc.c:1646  */
+  case 69:
+#line 440 "dtc-parser.y" /* yacc.c:1646  */
     {
 			if ((yyvsp[0].integer) != 0) {
 				(yyval.integer) = (yyvsp[-2].integer) / (yyvsp[0].integer);
@@ -1918,11 +1950,11 @@ yyreduce:
 				(yyval.integer) = 0;
 			}
 		}
-#line 1922 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1954 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 66:
-#line 423 "dtc-parser.y" /* yacc.c:1646  */
+  case 70:
+#line 449 "dtc-parser.y" /* yacc.c:1646  */
     {
 			if ((yyvsp[0].integer) != 0) {
 				(yyval.integer) = (yyvsp[-2].integer) % (yyvsp[0].integer);
@@ -1931,103 +1963,103 @@ yyreduce:
 				(yyval.integer) = 0;
 			}
 		}
-#line 1935 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1967 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 69:
-#line 436 "dtc-parser.y" /* yacc.c:1646  */
+  case 73:
+#line 462 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = -(yyvsp[0].integer); }
-#line 1941 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1973 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 70:
-#line 437 "dtc-parser.y" /* yacc.c:1646  */
+  case 74:
+#line 463 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = ~(yyvsp[0].integer); }
-#line 1947 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1979 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 71:
-#line 438 "dtc-parser.y" /* yacc.c:1646  */
+  case 75:
+#line 464 "dtc-parser.y" /* yacc.c:1646  */
     { (yyval.integer) = !(yyvsp[0].integer); }
-#line 1953 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1985 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 72:
-#line 443 "dtc-parser.y" /* yacc.c:1646  */
+  case 76:
+#line 469 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.data) = empty_data;
 		}
-#line 1961 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 1993 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 73:
-#line 447 "dtc-parser.y" /* yacc.c:1646  */
+  case 77:
+#line 473 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.data) = data_append_byte((yyvsp[-1].data), (yyvsp[0].byte));
 		}
-#line 1969 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 2001 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 74:
-#line 451 "dtc-parser.y" /* yacc.c:1646  */
+  case 78:
+#line 477 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.data) = data_add_marker((yyvsp[-1].data), LABEL, (yyvsp[0].labelref));
 		}
-#line 1977 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 2009 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 75:
-#line 458 "dtc-parser.y" /* yacc.c:1646  */
+  case 79:
+#line 484 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.nodelist) = NULL;
 		}
-#line 1985 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 2017 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 76:
-#line 462 "dtc-parser.y" /* yacc.c:1646  */
+  case 80:
+#line 488 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.nodelist) = chain_node((yyvsp[-1].node), (yyvsp[0].nodelist));
 		}
-#line 1993 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 2025 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 77:
-#line 466 "dtc-parser.y" /* yacc.c:1646  */
+  case 81:
+#line 492 "dtc-parser.y" /* yacc.c:1646  */
     {
 			ERROR(&(yylsp[0]), "Properties must precede subnodes");
 			YYERROR;
 		}
-#line 2002 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 2034 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 78:
-#line 474 "dtc-parser.y" /* yacc.c:1646  */
+  case 82:
+#line 500 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.node) = name_node((yyvsp[0].node), (yyvsp[-1].propnodename));
 		}
-#line 2010 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 2042 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 79:
-#line 478 "dtc-parser.y" /* yacc.c:1646  */
+  case 83:
+#line 504 "dtc-parser.y" /* yacc.c:1646  */
     {
 			(yyval.node) = name_node(build_node_delete(), (yyvsp[-1].propnodename));
 		}
-#line 2018 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 2050 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
-  case 80:
-#line 482 "dtc-parser.y" /* yacc.c:1646  */
+  case 84:
+#line 508 "dtc-parser.y" /* yacc.c:1646  */
     {
 			add_label(&(yyvsp[0].node)->labels, (yyvsp[-1].labelref));
 			(yyval.node) = (yyvsp[0].node);
 		}
-#line 2027 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 2059 "dtc-parser.tab.c" /* yacc.c:1646  */
     break;
 
 
-#line 2031 "dtc-parser.tab.c" /* yacc.c:1646  */
+#line 2063 "dtc-parser.tab.c" /* yacc.c:1646  */
       default: break;
     }
   /* User semantic actions sometimes alter yychar, and that requires
@@ -2262,7 +2294,7 @@ yyreturn:
 #endif
   return yyresult;
 }
-#line 488 "dtc-parser.y" /* yacc.c:1906  */
+#line 514 "dtc-parser.y" /* yacc.c:1906  */
 
 
 void yyerror(char const *s)
diff --git a/scripts/dtc/dtc-parser.tab.h_shipped b/scripts/dtc/dtc-parser.tab.h_shipped
index 30867c6..6aa512c 100644
--- a/scripts/dtc/dtc-parser.tab.h_shipped
+++ b/scripts/dtc/dtc-parser.tab.h_shipped
@@ -1,8 +1,8 @@
-/* A Bison parser, made by GNU Bison 3.0.2.  */
+/* A Bison parser, made by GNU Bison 3.0.4.  */
 
 /* Bison interface for Yacc-like parsers in C
 
-   Copyright (C) 1984, 1989-1990, 2000-2013 Free Software Foundation, Inc.
+   Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -46,35 +46,36 @@ extern int yydebug;
   enum yytokentype
   {
     DT_V1 = 258,
-    DT_MEMRESERVE = 259,
-    DT_LSHIFT = 260,
-    DT_RSHIFT = 261,
-    DT_LE = 262,
-    DT_GE = 263,
-    DT_EQ = 264,
-    DT_NE = 265,
-    DT_AND = 266,
-    DT_OR = 267,
-    DT_BITS = 268,
-    DT_DEL_PROP = 269,
-    DT_DEL_NODE = 270,
-    DT_PROPNODENAME = 271,
-    DT_LITERAL = 272,
-    DT_CHAR_LITERAL = 273,
-    DT_BYTE = 274,
-    DT_STRING = 275,
-    DT_LABEL = 276,
-    DT_REF = 277,
-    DT_INCBIN = 278
+    DT_PLUGIN = 259,
+    DT_MEMRESERVE = 260,
+    DT_LSHIFT = 261,
+    DT_RSHIFT = 262,
+    DT_LE = 263,
+    DT_GE = 264,
+    DT_EQ = 265,
+    DT_NE = 266,
+    DT_AND = 267,
+    DT_OR = 268,
+    DT_BITS = 269,
+    DT_DEL_PROP = 270,
+    DT_DEL_NODE = 271,
+    DT_PROPNODENAME = 272,
+    DT_LITERAL = 273,
+    DT_CHAR_LITERAL = 274,
+    DT_BYTE = 275,
+    DT_STRING = 276,
+    DT_LABEL = 277,
+    DT_REF = 278,
+    DT_INCBIN = 279
   };
 #endif
 
 /* Value type.  */
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
-typedef union YYSTYPE YYSTYPE;
+
 union YYSTYPE
 {
-#line 38 "dtc-parser.y" /* yacc.c:1909  */
+#line 39 "dtc-parser.y" /* yacc.c:1909  */
 
 	char *propnodename;
 	char *labelref;
@@ -92,9 +93,12 @@ union YYSTYPE
 	struct node *nodelist;
 	struct reserve_info *re;
 	uint64_t integer;
+	unsigned int flags;
 
-#line 97 "dtc-parser.tab.h" /* yacc.c:1909  */
+#line 99 "dtc-parser.tab.h" /* yacc.c:1909  */
 };
+
+typedef union YYSTYPE YYSTYPE;
 # define YYSTYPE_IS_TRIVIAL 1
 # define YYSTYPE_IS_DECLARED 1
 #endif
diff --git a/scripts/dtc/dtc-parser.y b/scripts/dtc/dtc-parser.y
index 000873f..ca3f500 100644
--- a/scripts/dtc/dtc-parser.y
+++ b/scripts/dtc/dtc-parser.y
@@ -19,6 +19,7 @@
  */
 %{
 #include <stdio.h>
+#include <inttypes.h>
 
 #include "dtc.h"
 #include "srcpos.h"
@@ -31,7 +32,7 @@ extern void yyerror(char const *s);
 		treesource_error = true; \
 	} while (0)
 
-extern struct boot_info *the_boot_info;
+extern struct dt_info *parser_output;
 extern bool treesource_error;
 %}
 
@@ -52,9 +53,11 @@ extern bool treesource_error;
 	struct node *nodelist;
 	struct reserve_info *re;
 	uint64_t integer;
+	unsigned int flags;
 }
 
 %token DT_V1
+%token DT_PLUGIN
 %token DT_MEMRESERVE
 %token DT_LSHIFT DT_RSHIFT DT_LE DT_GE DT_EQ DT_NE DT_AND DT_OR
 %token DT_BITS
@@ -71,6 +74,8 @@ extern bool treesource_error;
 
 %type <data> propdata
 %type <data> propdataprefix
+%type <flags> header
+%type <flags> headers
 %type <re> memreserve
 %type <re> memreserves
 %type <array> arrayprefix
@@ -101,10 +106,31 @@ extern bool treesource_error;
 %%
 
 sourcefile:
-	  DT_V1 ';' memreserves devicetree
+	  headers memreserves devicetree
 		{
-			the_boot_info = build_boot_info($3, $4,
-							guess_boot_cpuid($4));
+			parser_output = build_dt_info($1, $2, $3,
+			                              guess_boot_cpuid($3));
+		}
+	;
+
+header:
+	  DT_V1 ';'
+		{
+			$$ = DTSF_V1;
+		}
+	| DT_V1 ';' DT_PLUGIN ';'
+		{
+			$$ = DTSF_V1 | DTSF_PLUGIN;
+		}
+	;
+
+headers:
+	  header
+	| header headers
+		{
+			if ($2 != $1)
+				ERROR(&@2, "Header flags don't match earlier ones");
+			$$ = $1;
 		}
 	;
 
@@ -145,10 +171,10 @@ devicetree:
 		{
 			struct node *target = get_node_by_ref($1, $3);
 
-			add_label(&target->labels, $2);
-			if (target)
+			if (target) {
+				add_label(&target->labels, $2);
 				merge_nodes(target, $4);
-			else
+			} else
 				ERROR(&@3, "Label or path %s not found", $3);
 			$$ = $1;
 		}
diff --git a/scripts/dtc/dtc.c b/scripts/dtc/dtc.c
index 5fa23c4..f5eed9d 100644
--- a/scripts/dtc/dtc.c
+++ b/scripts/dtc/dtc.c
@@ -30,7 +30,16 @@ int quiet;		/* Level of quietness */
 int reservenum;		/* Number of memory reservation slots */
 int minsize;		/* Minimum blob size */
 int padsize;		/* Additional padding to blob */
+int alignsize;		/* Additional padding to blob accroding to the alignsize */
 int phandle_format = PHANDLE_BOTH;	/* Use linux,phandle or phandle properties */
+int generate_symbols;	/* enable symbols & fixup support */
+int generate_fixups;		/* suppress generation of fixups on symbol support */
+int auto_label_aliases;		/* auto generate labels -> aliases */
+
+static int is_power_of_2(int x)
+{
+	return (x > 0) && ((x & (x - 1)) == 0);
+}
 
 static void fill_fullpaths(struct node *tree, const char *prefix)
 {
@@ -53,7 +62,7 @@ static void fill_fullpaths(struct node *tree, const char *prefix)
 #define FDT_VERSION(version)	_FDT_VERSION(version)
 #define _FDT_VERSION(version)	#version
 static const char usage_synopsis[] = "dtc [options] <input file>";
-static const char usage_short_opts[] = "qI:O:o:V:d:R:S:p:fb:i:H:sW:E:hv";
+static const char usage_short_opts[] = "qI:O:o:V:d:R:S:p:a:fb:i:H:sW:E:@Ahv";
 static struct option const usage_long_opts[] = {
 	{"quiet",            no_argument, NULL, 'q'},
 	{"in-format",         a_argument, NULL, 'I'},
@@ -64,6 +73,7 @@ static struct option const usage_long_opts[] = {
 	{"reserve",           a_argument, NULL, 'R'},
 	{"space",             a_argument, NULL, 'S'},
 	{"pad",               a_argument, NULL, 'p'},
+	{"align",             a_argument, NULL, 'a'},
 	{"boot-cpu",          a_argument, NULL, 'b'},
 	{"force",            no_argument, NULL, 'f'},
 	{"include",           a_argument, NULL, 'i'},
@@ -71,6 +81,8 @@ static struct option const usage_long_opts[] = {
 	{"phandle",           a_argument, NULL, 'H'},
 	{"warning",           a_argument, NULL, 'W'},
 	{"error",             a_argument, NULL, 'E'},
+	{"symbols",	     no_argument, NULL, '@'},
+	{"auto-alias",       no_argument, NULL, 'A'},
 	{"help",             no_argument, NULL, 'h'},
 	{"version",          no_argument, NULL, 'v'},
 	{NULL,               no_argument, NULL, 0x0},
@@ -91,6 +103,7 @@ static const char * const usage_opts_help[] = {
 	"\n\tMake space for <number> reserve map entries (for dtb and asm output)",
 	"\n\tMake the blob at least <bytes> long (extra space)",
 	"\n\tAdd padding to the blob of <bytes> long (extra space)",
+	"\n\tMake the blob align to the <bytes> (extra space)",
 	"\n\tSet the physical boot cpu",
 	"\n\tTry to produce output even if the input tree has errors",
 	"\n\tAdd a path to search for include files",
@@ -101,6 +114,8 @@ static const char * const usage_opts_help[] = {
 	 "\t\tboth   - Both \"linux,phandle\" and \"phandle\" properties",
 	"\n\tEnable/disable warnings (prefix with \"no-\")",
 	"\n\tEnable/disable errors (prefix with \"no-\")",
+	"\n\tEnable generation of symbols",
+	"\n\tEnable auto-alias of labels",
 	"\n\tPrint this help and exit",
 	"\n\tPrint version and exit",
 	NULL,
@@ -123,7 +138,7 @@ static const char *guess_type_by_name(const char *fname, const char *fallback)
 static const char *guess_input_format(const char *fname, const char *fallback)
 {
 	struct stat statbuf;
-	uint32_t magic;
+	fdt32_t magic;
 	FILE *f;
 
 	if (stat(fname, &statbuf) != 0)
@@ -144,8 +159,7 @@ static const char *guess_input_format(const char *fname, const char *fallback)
 	}
 	fclose(f);
 
-	magic = fdt32_to_cpu(magic);
-	if (magic == FDT_MAGIC)
+	if (fdt32_to_cpu(magic) == FDT_MAGIC)
 		return "dtb";
 
 	return guess_type_by_name(fname, fallback);
@@ -153,7 +167,7 @@ static const char *guess_input_format(const char *fname, const char *fallback)
 
 int main(int argc, char *argv[])
 {
-	struct boot_info *bi;
+	struct dt_info *dti;
 	const char *inform = NULL;
 	const char *outform = NULL;
 	const char *outname = "-";
@@ -169,6 +183,7 @@ int main(int argc, char *argv[])
 	reservenum = 0;
 	minsize    = 0;
 	padsize    = 0;
+	alignsize  = 0;
 
 	while ((opt = util_getopt_long()) != EOF) {
 		switch (opt) {
@@ -196,6 +211,12 @@ int main(int argc, char *argv[])
 		case 'p':
 			padsize = strtol(optarg, NULL, 0);
 			break;
+		case 'a':
+			alignsize = strtol(optarg, NULL, 0);
+			if (!is_power_of_2(alignsize))
+				die("Invalid argument \"%d\" to -a option\n",
+				    alignsize);
+			break;
 		case 'f':
 			force = true;
 			break;
@@ -234,6 +255,13 @@ int main(int argc, char *argv[])
 			parse_checks_option(false, true, optarg);
 			break;
 
+		case '@':
+			generate_symbols = 1;
+			break;
+		case 'A':
+			auto_label_aliases = 1;
+			break;
+
 		case 'h':
 			usage(NULL);
 		default:
@@ -272,27 +300,45 @@ int main(int argc, char *argv[])
 		}
 	}
 	if (streq(inform, "dts"))
-		bi = dt_from_source(arg);
+		dti = dt_from_source(arg);
 	else if (streq(inform, "fs"))
-		bi = dt_from_fs(arg);
+		dti = dt_from_fs(arg);
 	else if(streq(inform, "dtb"))
-		bi = dt_from_blob(arg);
+		dti = dt_from_blob(arg);
 	else
 		die("Unknown input format \"%s\"\n", inform);
 
+	dti->outname = outname;
+
 	if (depfile) {
 		fputc('\n', depfile);
 		fclose(depfile);
 	}
 
 	if (cmdline_boot_cpuid != -1)
-		bi->boot_cpuid_phys = cmdline_boot_cpuid;
+		dti->boot_cpuid_phys = cmdline_boot_cpuid;
+
+	fill_fullpaths(dti->dt, "");
+	process_checks(force, dti);
+
+	/* on a plugin, generate by default */
+	if (dti->dtsflags & DTSF_PLUGIN) {
+		generate_fixups = 1;
+	}
 
-	fill_fullpaths(bi->dt, "");
-	process_checks(force, bi);
+	if (auto_label_aliases)
+		generate_label_tree(dti, "aliases", false);
+
+	if (generate_symbols)
+		generate_label_tree(dti, "__symbols__", true);
+
+	if (generate_fixups) {
+		generate_fixups_tree(dti, "__fixups__");
+		generate_local_fixups_tree(dti, "__local_fixups__");
+	}
 
 	if (sort)
-		sort_tree(bi);
+		sort_tree(dti);
 
 	if (streq(outname, "-")) {
 		outf = stdout;
@@ -304,11 +350,11 @@ int main(int argc, char *argv[])
 	}
 
 	if (streq(outform, "dts")) {
-		dt_to_source(outf, bi);
+		dt_to_source(outf, dti);
 	} else if (streq(outform, "dtb")) {
-		dt_to_blob(outf, bi, outversion);
+		dt_to_blob(outf, dti, outversion);
 	} else if (streq(outform, "asm")) {
-		dt_to_asm(outf, bi, outversion);
+		dt_to_asm(outf, dti, outversion);
 	} else if (streq(outform, "null")) {
 		/* do nothing */
 	} else {
diff --git a/scripts/dtc/dtc.h b/scripts/dtc/dtc.h
index 56212c8..403b79d 100644
--- a/scripts/dtc/dtc.h
+++ b/scripts/dtc/dtc.h
@@ -43,7 +43,6 @@
 #define debug(...)
 #endif
 
-
 #define DEFAULT_FDT_VERSION	17
 
 /*
@@ -53,7 +52,11 @@ extern int quiet;		/* Level of quietness */
 extern int reservenum;		/* Number of memory reservation slots */
 extern int minsize;		/* Minimum blob size */
 extern int padsize;		/* Additional padding to blob */
+extern int alignsize;		/* Additional padding to blob accroding to the alignsize */
 extern int phandle_format;	/* Use linux,phandle or phandle properties */
+extern int generate_symbols;	/* generate symbols for nodes with labels */
+extern int generate_fixups;	/* generate fixups */
+extern int auto_label_aliases;	/* auto generate labels -> aliases */
 
 #define PHANDLE_LEGACY	0x1
 #define PHANDLE_EPAPR	0x2
@@ -110,7 +113,7 @@ struct data data_insert_at_marker(struct data d, struct marker *m,
 struct data data_merge(struct data d1, struct data d2);
 struct data data_append_cell(struct data d, cell_t word);
 struct data data_append_integer(struct data d, uint64_t word, int bits);
-struct data data_append_re(struct data d, const struct fdt_reserve_entry *re);
+struct data data_append_re(struct data d, uint64_t address, uint64_t size);
 struct data data_append_addr(struct data d, uint64_t addr);
 struct data data_append_byte(struct data d, uint8_t byte);
 struct data data_append_zeroes(struct data d, int len);
@@ -201,6 +204,8 @@ void delete_property(struct property *prop);
 void add_child(struct node *parent, struct node *child);
 void delete_node_by_name(struct node *parent, char *name);
 void delete_node(struct node *node);
+void append_to_property(struct node *node,
+			char *name, const void *data, int len);
 
 const char *get_unitname(struct node *node);
 struct property *get_property(struct node *node, const char *propname);
@@ -221,7 +226,7 @@ uint32_t guess_boot_cpuid(struct node *tree);
 /* Boot info (tree plus memreserve information */
 
 struct reserve_info {
-	struct fdt_reserve_entry re;
+	uint64_t address, size;
 
 	struct reserve_info *next;
 
@@ -235,35 +240,45 @@ struct reserve_info *add_reserve_entry(struct reserve_info *list,
 				       struct reserve_info *new);
 
 
-struct boot_info {
+struct dt_info {
+	unsigned int dtsflags;
 	struct reserve_info *reservelist;
-	struct node *dt;		/* the device tree */
 	uint32_t boot_cpuid_phys;
+	struct node *dt;		/* the device tree */
+	const char *outname;		/* filename being written to, "-" for stdout */
 };
 
-struct boot_info *build_boot_info(struct reserve_info *reservelist,
-				  struct node *tree, uint32_t boot_cpuid_phys);
-void sort_tree(struct boot_info *bi);
+/* DTS version flags definitions */
+#define DTSF_V1		0x0001	/* /dts-v1/ */
+#define DTSF_PLUGIN	0x0002	/* /plugin/ */
+
+struct dt_info *build_dt_info(unsigned int dtsflags,
+			      struct reserve_info *reservelist,
+			      struct node *tree, uint32_t boot_cpuid_phys);
+void sort_tree(struct dt_info *dti);
+void generate_label_tree(struct dt_info *dti, char *name, bool allocph);
+void generate_fixups_tree(struct dt_info *dti, char *name);
+void generate_local_fixups_tree(struct dt_info *dti, char *name);
 
 /* Checks */
 
 void parse_checks_option(bool warn, bool error, const char *arg);
-void process_checks(bool force, struct boot_info *bi);
+void process_checks(bool force, struct dt_info *dti);
 
 /* Flattened trees */
 
-void dt_to_blob(FILE *f, struct boot_info *bi, int version);
-void dt_to_asm(FILE *f, struct boot_info *bi, int version);
+void dt_to_blob(FILE *f, struct dt_info *dti, int version);
+void dt_to_asm(FILE *f, struct dt_info *dti, int version);
 
-struct boot_info *dt_from_blob(const char *fname);
+struct dt_info *dt_from_blob(const char *fname);
 
 /* Tree source */
 
-void dt_to_source(FILE *f, struct boot_info *bi);
-struct boot_info *dt_from_source(const char *f);
+void dt_to_source(FILE *f, struct dt_info *dti);
+struct dt_info *dt_from_source(const char *f);
 
 /* FS trees */
 
-struct boot_info *dt_from_fs(const char *dirname);
+struct dt_info *dt_from_fs(const char *dirname);
 
 #endif /* _DTC_H */
diff --git a/scripts/dtc/flattree.c b/scripts/dtc/flattree.c
index ec14954..fcf7154 100644
--- a/scripts/dtc/flattree.c
+++ b/scripts/dtc/flattree.c
@@ -49,7 +49,7 @@ static struct version_info {
 
 struct emitter {
 	void (*cell)(void *, cell_t);
-	void (*string)(void *, char *, int);
+	void (*string)(void *, const char *, int);
 	void (*align)(void *, int);
 	void (*data)(void *, struct data);
 	void (*beginnode)(void *, struct label *labels);
@@ -64,7 +64,7 @@ static void bin_emit_cell(void *e, cell_t val)
 	*dtbuf = data_append_cell(*dtbuf, val);
 }
 
-static void bin_emit_string(void *e, char *str, int len)
+static void bin_emit_string(void *e, const char *str, int len)
 {
 	struct data *dtbuf = e;
 
@@ -144,22 +144,14 @@ static void asm_emit_cell(void *e, cell_t val)
 		(val >> 8) & 0xff, val & 0xff);
 }
 
-static void asm_emit_string(void *e, char *str, int len)
+static void asm_emit_string(void *e, const char *str, int len)
 {
 	FILE *f = e;
-	char c = 0;
 
-	if (len != 0) {
-		/* XXX: ewww */
-		c = str[len];
-		str[len] = '\0';
-	}
-
-	fprintf(f, "\t.string\t\"%s\"\n", str);
-
-	if (len != 0) {
-		str[len] = c;
-	}
+	if (len != 0)
+		fprintf(f, "\t.string\t\"%.*s\"\n", len, str);
+	else
+		fprintf(f, "\t.string\t\"%s\"\n", str);
 }
 
 static void asm_emit_align(void *e, int a)
@@ -179,7 +171,7 @@ static void asm_emit_data(void *e, struct data d)
 		emit_offset_label(f, m->ref, m->offset);
 
 	while ((d.len - off) >= sizeof(uint32_t)) {
-		asm_emit_cell(e, fdt32_to_cpu(*((uint32_t *)(d.val+off))));
+		asm_emit_cell(e, fdt32_to_cpu(*((fdt32_t *)(d.val+off))));
 		off += sizeof(uint32_t);
 	}
 
@@ -318,17 +310,16 @@ static struct data flatten_reserve_list(struct reserve_info *reservelist,
 {
 	struct reserve_info *re;
 	struct data d = empty_data;
-	static struct fdt_reserve_entry null_re = {0,0};
 	int    j;
 
 	for (re = reservelist; re; re = re->next) {
-		d = data_append_re(d, &re->re);
+		d = data_append_re(d, re->address, re->size);
 	}
 	/*
 	 * Add additional reserved slots if the user asked for them.
 	 */
 	for (j = 0; j < reservenum; j++) {
-		d = data_append_re(d, &null_re);
+		d = data_append_re(d, 0, 0);
 	}
 
 	return d;
@@ -366,7 +357,7 @@ static void make_fdt_header(struct fdt_header *fdt,
 		fdt->size_dt_struct = cpu_to_fdt32(dtsize);
 }
 
-void dt_to_blob(FILE *f, struct boot_info *bi, int version)
+void dt_to_blob(FILE *f, struct dt_info *dti, int version)
 {
 	struct version_info *vi = NULL;
 	int i;
@@ -384,29 +375,36 @@ void dt_to_blob(FILE *f, struct boot_info *bi, int version)
 	if (!vi)
 		die("Unknown device tree blob version %d\n", version);
 
-	flatten_tree(bi->dt, &bin_emitter, &dtbuf, &strbuf, vi);
+	flatten_tree(dti->dt, &bin_emitter, &dtbuf, &strbuf, vi);
 	bin_emit_cell(&dtbuf, FDT_END);
 
-	reservebuf = flatten_reserve_list(bi->reservelist, vi);
+	reservebuf = flatten_reserve_list(dti->reservelist, vi);
 
 	/* Make header */
 	make_fdt_header(&fdt, vi, reservebuf.len, dtbuf.len, strbuf.len,
-			bi->boot_cpuid_phys);
+			dti->boot_cpuid_phys);
 
 	/*
 	 * If the user asked for more space than is used, adjust the totalsize.
 	 */
 	if (minsize > 0) {
 		padlen = minsize - fdt32_to_cpu(fdt.totalsize);
-		if ((padlen < 0) && (quiet < 1))
-			fprintf(stderr,
-				"Warning: blob size %d >= minimum size %d\n",
-				fdt32_to_cpu(fdt.totalsize), minsize);
+		if (padlen < 0) {
+			padlen = 0;
+			if (quiet < 1)
+				fprintf(stderr,
+					"Warning: blob size %d >= minimum size %d\n",
+					fdt32_to_cpu(fdt.totalsize), minsize);
+		}
 	}
 
 	if (padsize > 0)
 		padlen = padsize;
 
+	if (alignsize > 0)
+		padlen = ALIGN(fdt32_to_cpu(fdt.totalsize) + padlen, alignsize)
+			- fdt32_to_cpu(fdt.totalsize);
+
 	if (padlen > 0) {
 		int tsize = fdt32_to_cpu(fdt.totalsize);
 		tsize += padlen;
@@ -460,7 +458,7 @@ static void dump_stringtable_asm(FILE *f, struct data strbuf)
 	}
 }
 
-void dt_to_asm(FILE *f, struct boot_info *bi, int version)
+void dt_to_asm(FILE *f, struct dt_info *dti, int version)
 {
 	struct version_info *vi = NULL;
 	int i;
@@ -500,7 +498,7 @@ void dt_to_asm(FILE *f, struct boot_info *bi, int version)
 
 	if (vi->flags & FTF_BOOTCPUID) {
 		fprintf(f, "\t/* boot_cpuid_phys */\n");
-		asm_emit_cell(f, bi->boot_cpuid_phys);
+		asm_emit_cell(f, dti->boot_cpuid_phys);
 	}
 
 	if (vi->flags & FTF_STRTABSIZE) {
@@ -530,18 +528,18 @@ void dt_to_asm(FILE *f, struct boot_info *bi, int version)
 	 * Use .long on high and low halfs of u64s to avoid .quad
 	 * as it appears .quad isn't available in some assemblers.
 	 */
-	for (re = bi->reservelist; re; re = re->next) {
+	for (re = dti->reservelist; re; re = re->next) {
 		struct label *l;
 
 		for_each_label(re->labels, l) {
 			fprintf(f, "\t.globl\t%s\n", l->label);
 			fprintf(f, "%s:\n", l->label);
 		}
-		ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->re.address >> 32));
+		ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->address >> 32));
 		ASM_EMIT_BELONG(f, "0x%08x",
-				(unsigned int)(re->re.address & 0xffffffff));
-		ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->re.size >> 32));
-		ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->re.size & 0xffffffff));
+				(unsigned int)(re->address & 0xffffffff));
+		ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->size >> 32));
+		ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->size & 0xffffffff));
 	}
 	for (i = 0; i < reservenum; i++) {
 		fprintf(f, "\t.long\t0, 0\n\t.long\t0, 0\n");
@@ -550,7 +548,7 @@ void dt_to_asm(FILE *f, struct boot_info *bi, int version)
 	fprintf(f, "\t.long\t0, 0\n\t.long\t0, 0\n");
 
 	emit_label(f, symprefix, "struct_start");
-	flatten_tree(bi->dt, &asm_emitter, f, &strbuf, vi);
+	flatten_tree(dti->dt, &asm_emitter, f, &strbuf, vi);
 
 	fprintf(f, "\t/* FDT_END */\n");
 	asm_emit_cell(f, FDT_END);
@@ -572,6 +570,8 @@ void dt_to_asm(FILE *f, struct boot_info *bi, int version)
 	if (padsize > 0) {
 		fprintf(f, "\t.space\t%d, 0\n", padsize);
 	}
+	if (alignsize > 0)
+		asm_emit_align(f, alignsize);
 	emit_label(f, symprefix, "blob_abs_end");
 
 	data_free(strbuf);
@@ -600,7 +600,7 @@ static void flat_read_chunk(struct inbuf *inb, void *p, int len)
 
 static uint32_t flat_read_word(struct inbuf *inb)
 {
-	uint32_t val;
+	fdt32_t val;
 
 	assert(((inb->ptr - inb->base) % sizeof(val)) == 0);
 
@@ -709,13 +709,15 @@ static struct reserve_info *flat_read_mem_reserve(struct inbuf *inb)
 	 * First pass, count entries.
 	 */
 	while (1) {
+		uint64_t address, size;
+
 		flat_read_chunk(inb, &re, sizeof(re));
-		re.address  = fdt64_to_cpu(re.address);
-		re.size = fdt64_to_cpu(re.size);
-		if (re.size == 0)
+		address  = fdt64_to_cpu(re.address);
+		size = fdt64_to_cpu(re.size);
+		if (size == 0)
 			break;
 
-		new = build_reserve_entry(re.address, re.size);
+		new = build_reserve_entry(address, size);
 		reservelist = add_reserve_entry(reservelist, new);
 	}
 
@@ -797,13 +799,18 @@ static struct node *unflatten_tree(struct inbuf *dtbuf,
 		}
 	} while (val != FDT_END_NODE);
 
+	if (node->name != flatname) {
+		free(flatname);
+	}
+
 	return node;
 }
 
 
-struct boot_info *dt_from_blob(const char *fname)
+struct dt_info *dt_from_blob(const char *fname)
 {
 	FILE *f;
+	fdt32_t magic_buf, totalsize_buf;
 	uint32_t magic, totalsize, version, size_dt, boot_cpuid_phys;
 	uint32_t off_dt, off_str, off_mem_rsvmap;
 	int rc;
@@ -820,7 +827,7 @@ struct boot_info *dt_from_blob(const char *fname)
 
 	f = srcfile_relative_open(fname, NULL);
 
-	rc = fread(&magic, sizeof(magic), 1, f);
+	rc = fread(&magic_buf, sizeof(magic_buf), 1, f);
 	if (ferror(f))
 		die("Error reading DT blob magic number: %s\n",
 		    strerror(errno));
@@ -831,11 +838,11 @@ struct boot_info *dt_from_blob(const char *fname)
 			die("Mysterious short read reading magic number\n");
 	}
 
-	magic = fdt32_to_cpu(magic);
+	magic = fdt32_to_cpu(magic_buf);
 	if (magic != FDT_MAGIC)
 		die("Blob has incorrect magic number\n");
 
-	rc = fread(&totalsize, sizeof(totalsize), 1, f);
+	rc = fread(&totalsize_buf, sizeof(totalsize_buf), 1, f);
 	if (ferror(f))
 		die("Error reading DT blob size: %s\n", strerror(errno));
 	if (rc < 1) {
@@ -845,7 +852,7 @@ struct boot_info *dt_from_blob(const char *fname)
 			die("Mysterious short read reading blob size\n");
 	}
 
-	totalsize = fdt32_to_cpu(totalsize);
+	totalsize = fdt32_to_cpu(totalsize_buf);
 	if (totalsize < FDT_V1_SIZE)
 		die("DT blob size (%d) is too small\n", totalsize);
 
@@ -929,5 +936,5 @@ struct boot_info *dt_from_blob(const char *fname)
 
 	fclose(f);
 
-	return build_boot_info(reservelist, tree, boot_cpuid_phys);
+	return build_dt_info(DTSF_V1, reservelist, tree, boot_cpuid_phys);
 }
diff --git a/scripts/dtc/fstree.c b/scripts/dtc/fstree.c
index 6d1beec..ae7d06c 100644
--- a/scripts/dtc/fstree.c
+++ b/scripts/dtc/fstree.c
@@ -79,13 +79,12 @@ static struct node *read_fstree(const char *dirname)
 	return tree;
 }
 
-struct boot_info *dt_from_fs(const char *dirname)
+struct dt_info *dt_from_fs(const char *dirname)
 {
 	struct node *tree;
 
 	tree = read_fstree(dirname);
 	tree = name_node(tree, "");
 
-	return build_boot_info(NULL, tree, guess_boot_cpuid(tree));
+	return build_dt_info(DTSF_V1, NULL, tree, guess_boot_cpuid(tree));
 }
-
diff --git a/scripts/dtc/libfdt/Makefile.libfdt b/scripts/dtc/libfdt/Makefile.libfdt
index 09c322e..098b3f3 100644
--- a/scripts/dtc/libfdt/Makefile.libfdt
+++ b/scripts/dtc/libfdt/Makefile.libfdt
@@ -7,5 +7,5 @@ LIBFDT_soname = libfdt.$(SHAREDLIB_EXT).1
 LIBFDT_INCLUDES = fdt.h libfdt.h libfdt_env.h
 LIBFDT_VERSION = version.lds
 LIBFDT_SRCS = fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c fdt_empty_tree.c \
-	fdt_addresses.c
+	fdt_addresses.c fdt_overlay.c
 LIBFDT_OBJS = $(LIBFDT_SRCS:%.c=%.o)
diff --git a/scripts/dtc/libfdt/fdt_ro.c b/scripts/dtc/libfdt/fdt_ro.c
index 50cce86..3d00d2e 100644
--- a/scripts/dtc/libfdt/fdt_ro.c
+++ b/scripts/dtc/libfdt/fdt_ro.c
@@ -88,6 +88,32 @@ static int _fdt_string_eq(const void *fdt, int stroffset,
 	return (strlen(p) == len) && (memcmp(p, s, len) == 0);
 }
 
+uint32_t fdt_get_max_phandle(const void *fdt)
+{
+	uint32_t max_phandle = 0;
+	int offset;
+
+	for (offset = fdt_next_node(fdt, -1, NULL);;
+	     offset = fdt_next_node(fdt, offset, NULL)) {
+		uint32_t phandle;
+
+		if (offset == -FDT_ERR_NOTFOUND)
+			return max_phandle;
+
+		if (offset < 0)
+			return (uint32_t)-1;
+
+		phandle = fdt_get_phandle(fdt, offset);
+		if (phandle == (uint32_t)-1)
+			continue;
+
+		if (phandle > max_phandle)
+			max_phandle = phandle;
+	}
+
+	return 0;
+}
+
 int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size)
 {
 	FDT_CHECK_HEADER(fdt);
@@ -545,7 +571,7 @@ int fdt_stringlist_count(const void *fdt, int nodeoffset, const char *property)
 
 	list = fdt_getprop(fdt, nodeoffset, property, &length);
 	if (!list)
-		return -length;
+		return length;
 
 	end = list + length;
 
@@ -571,7 +597,7 @@ int fdt_stringlist_search(const void *fdt, int nodeoffset, const char *property,
 
 	list = fdt_getprop(fdt, nodeoffset, property, &length);
 	if (!list)
-		return -length;
+		return length;
 
 	len = strlen(string) + 1;
 	end = list + length;
diff --git a/scripts/dtc/libfdt/fdt_rw.c b/scripts/dtc/libfdt/fdt_rw.c
index 8be02b1..3fd5847 100644
--- a/scripts/dtc/libfdt/fdt_rw.c
+++ b/scripts/dtc/libfdt/fdt_rw.c
@@ -191,17 +191,13 @@ int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size)
 int fdt_del_mem_rsv(void *fdt, int n)
 {
 	struct fdt_reserve_entry *re = _fdt_mem_rsv_w(fdt, n);
-	int err;
 
 	FDT_RW_CHECK_HEADER(fdt);
 
 	if (n >= fdt_num_mem_rsv(fdt))
 		return -FDT_ERR_NOTFOUND;
 
-	err = _fdt_splice_mem_rsv(fdt, re, 1, 0);
-	if (err)
-		return err;
-	return 0;
+	return _fdt_splice_mem_rsv(fdt, re, 1, 0);
 }
 
 static int _fdt_resize_property(void *fdt, int nodeoffset, const char *name,
@@ -287,7 +283,8 @@ int fdt_setprop(void *fdt, int nodeoffset, const char *name,
 	if (err)
 		return err;
 
-	memcpy(prop->data, val, len);
+	if (len)
+		memcpy(prop->data, val, len);
 	return 0;
 }
 
diff --git a/scripts/dtc/libfdt/fdt_strerror.c b/scripts/dtc/libfdt/fdt_strerror.c
index e6c3cee..9677a18 100644
--- a/scripts/dtc/libfdt/fdt_strerror.c
+++ b/scripts/dtc/libfdt/fdt_strerror.c
@@ -69,6 +69,7 @@ static struct fdt_errtabent fdt_errtable[] = {
 
 	FDT_ERRTABENT(FDT_ERR_BADOFFSET),
 	FDT_ERRTABENT(FDT_ERR_BADPATH),
+	FDT_ERRTABENT(FDT_ERR_BADPHANDLE),
 	FDT_ERRTABENT(FDT_ERR_BADSTATE),
 
 	FDT_ERRTABENT(FDT_ERR_TRUNCATED),
@@ -76,6 +77,11 @@ static struct fdt_errtabent fdt_errtable[] = {
 	FDT_ERRTABENT(FDT_ERR_BADVERSION),
 	FDT_ERRTABENT(FDT_ERR_BADSTRUCTURE),
 	FDT_ERRTABENT(FDT_ERR_BADLAYOUT),
+	FDT_ERRTABENT(FDT_ERR_INTERNAL),
+	FDT_ERRTABENT(FDT_ERR_BADNCELLS),
+	FDT_ERRTABENT(FDT_ERR_BADVALUE),
+	FDT_ERRTABENT(FDT_ERR_BADOVERLAY),
+	FDT_ERRTABENT(FDT_ERR_NOPHANDLES),
 };
 #define FDT_ERRTABSIZE	(sizeof(fdt_errtable) / sizeof(fdt_errtable[0]))
 
diff --git a/scripts/dtc/libfdt/fdt_wip.c b/scripts/dtc/libfdt/fdt_wip.c
index c5bbb68..6aaab39 100644
--- a/scripts/dtc/libfdt/fdt_wip.c
+++ b/scripts/dtc/libfdt/fdt_wip.c
@@ -55,21 +55,42 @@
 
 #include "libfdt_internal.h"
 
+int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset,
+					const char *name, int namelen,
+					uint32_t idx, const void *val,
+					int len)
+{
+	void *propval;
+	int proplen;
+
+	propval = fdt_getprop_namelen_w(fdt, nodeoffset, name, namelen,
+					&proplen);
+	if (!propval)
+		return proplen;
+
+	if (proplen < (len + idx))
+		return -FDT_ERR_NOSPACE;
+
+	memcpy((char *)propval + idx, val, len);
+	return 0;
+}
+
 int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name,
 			const void *val, int len)
 {
-	void *propval;
+	const void *propval;
 	int proplen;
 
-	propval = fdt_getprop_w(fdt, nodeoffset, name, &proplen);
+	propval = fdt_getprop(fdt, nodeoffset, name, &proplen);
 	if (! propval)
 		return proplen;
 
 	if (proplen != len)
 		return -FDT_ERR_NOSPACE;
 
-	memcpy(propval, val, len);
-	return 0;
+	return fdt_setprop_inplace_namelen_partial(fdt, nodeoffset, name,
+						   strlen(name), 0,
+						   val, len);
 }
 
 static void _fdt_nop_region(void *start, int len)
diff --git a/scripts/dtc/libfdt/libfdt.h b/scripts/dtc/libfdt/libfdt.h
index 59ca339..9e71bb9 100644
--- a/scripts/dtc/libfdt/libfdt.h
+++ b/scripts/dtc/libfdt/libfdt.h
@@ -61,7 +61,7 @@
 #define FDT_ERR_NOTFOUND	1
 	/* FDT_ERR_NOTFOUND: The requested node or property does not exist */
 #define FDT_ERR_EXISTS		2
-	/* FDT_ERR_EXISTS: Attemped to create a node or property which
+	/* FDT_ERR_EXISTS: Attempted to create a node or property which
 	 * already exists */
 #define FDT_ERR_NOSPACE		3
 	/* FDT_ERR_NOSPACE: Operation needed to expand the device
@@ -79,8 +79,10 @@
 	 * (e.g. missing a leading / for a function which requires an
 	 * absolute path) */
 #define FDT_ERR_BADPHANDLE	6
-	/* FDT_ERR_BADPHANDLE: Function was passed an invalid phandle
-	 * value.  phandle values of 0 and -1 are not permitted. */
+	/* FDT_ERR_BADPHANDLE: Function was passed an invalid phandle.
+	 * This can be caused either by an invalid phandle property
+	 * length, or the phandle value was either 0 or -1, which are
+	 * not permitted. */
 #define FDT_ERR_BADSTATE	7
 	/* FDT_ERR_BADSTATE: Function was passed an incomplete device
 	 * tree created by the sequential-write functions, which is
@@ -126,7 +128,16 @@
 	 * value. For example: a property expected to contain a string list
 	 * is not NUL-terminated within the length of its value. */
 
-#define FDT_ERR_MAX		15
+#define FDT_ERR_BADOVERLAY	16
+	/* FDT_ERR_BADOVERLAY: The device tree overlay, while
+	 * correctly structured, cannot be applied due to some
+	 * unexpected or missing value, property or node. */
+
+#define FDT_ERR_NOPHANDLES	17
+	/* FDT_ERR_NOPHANDLES: The device tree doesn't have any
+	 * phandle available anymore without causing an overflow */
+
+#define FDT_ERR_MAX		17
 
 /**********************************************************************/
 /* Low-level functions (you probably don't need these)                */
@@ -168,27 +179,55 @@ int fdt_first_subnode(const void *fdt, int offset);
  */
 int fdt_next_subnode(const void *fdt, int offset);
 
+/**
+ * fdt_for_each_subnode - iterate over all subnodes of a parent
+ *
+ * @node:	child node (int, lvalue)
+ * @fdt:	FDT blob (const void *)
+ * @parent:	parent node (int)
+ *
+ * This is actually a wrapper around a for loop and would be used like so:
+ *
+ *	fdt_for_each_subnode(node, fdt, parent) {
+ *		Use node
+ *		...
+ *	}
+ *
+ *	if ((node < 0) && (node != -FDT_ERR_NOT_FOUND)) {
+ *		Error handling
+ *	}
+ *
+ * Note that this is implemented as a macro and @node is used as
+ * iterator in the loop. The parent variable be constant or even a
+ * literal.
+ *
+ */
+#define fdt_for_each_subnode(node, fdt, parent)		\
+	for (node = fdt_first_subnode(fdt, parent);	\
+	     node >= 0;					\
+	     node = fdt_next_subnode(fdt, node))
+
 /**********************************************************************/
 /* General functions                                                  */
 /**********************************************************************/
 
 #define fdt_get_header(fdt, field) \
 	(fdt32_to_cpu(((const struct fdt_header *)(fdt))->field))
-#define fdt_magic(fdt) 			(fdt_get_header(fdt, magic))
+#define fdt_magic(fdt)			(fdt_get_header(fdt, magic))
 #define fdt_totalsize(fdt)		(fdt_get_header(fdt, totalsize))
 #define fdt_off_dt_struct(fdt)		(fdt_get_header(fdt, off_dt_struct))
 #define fdt_off_dt_strings(fdt)		(fdt_get_header(fdt, off_dt_strings))
 #define fdt_off_mem_rsvmap(fdt)		(fdt_get_header(fdt, off_mem_rsvmap))
 #define fdt_version(fdt)		(fdt_get_header(fdt, version))
-#define fdt_last_comp_version(fdt) 	(fdt_get_header(fdt, last_comp_version))
-#define fdt_boot_cpuid_phys(fdt) 	(fdt_get_header(fdt, boot_cpuid_phys))
-#define fdt_size_dt_strings(fdt) 	(fdt_get_header(fdt, size_dt_strings))
+#define fdt_last_comp_version(fdt)	(fdt_get_header(fdt, last_comp_version))
+#define fdt_boot_cpuid_phys(fdt)	(fdt_get_header(fdt, boot_cpuid_phys))
+#define fdt_size_dt_strings(fdt)	(fdt_get_header(fdt, size_dt_strings))
 #define fdt_size_dt_struct(fdt)		(fdt_get_header(fdt, size_dt_struct))
 
 #define __fdt_set_hdr(name) \
 	static inline void fdt_set_##name(void *fdt, uint32_t val) \
 	{ \
-		struct fdt_header *fdth = (struct fdt_header*)fdt; \
+		struct fdt_header *fdth = (struct fdt_header *)fdt; \
 		fdth->name = cpu_to_fdt32(val); \
 	}
 __fdt_set_hdr(magic);
@@ -258,6 +297,21 @@ int fdt_move(const void *fdt, void *buf, int bufsize);
  */
 const char *fdt_string(const void *fdt, int stroffset);
 
+/**
+ * fdt_get_max_phandle - retrieves the highest phandle in a tree
+ * @fdt: pointer to the device tree blob
+ *
+ * fdt_get_max_phandle retrieves the highest phandle in the given
+ * device tree. This will ignore badly formatted phandles, or phandles
+ * with a value of 0 or -1.
+ *
+ * returns:
+ *      the highest phandle on success
+ *      0, if no phandle was found in the device tree
+ *      -1, if an error occurred
+ */
+uint32_t fdt_get_max_phandle(const void *fdt);
+
 /**
  * fdt_num_mem_rsv - retrieve the number of memory reserve map entries
  * @fdt: pointer to the device tree blob
@@ -318,8 +372,9 @@ int fdt_subnode_offset_namelen(const void *fdt, int parentoffset,
  * returns:
  *	structure block offset of the requested subnode (>=0), on success
  *	-FDT_ERR_NOTFOUND, if the requested subnode does not exist
- *	-FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE tag
- *      -FDT_ERR_BADMAGIC,
+ *	-FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE
+ *		tag
+ *	-FDT_ERR_BADMAGIC,
  *	-FDT_ERR_BADVERSION,
  *	-FDT_ERR_BADSTATE,
  *	-FDT_ERR_BADSTRUCTURE,
@@ -351,7 +406,8 @@ int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen);
  * address).
  *
  * returns:
- *	structure block offset of the node with the requested path (>=0), on success
+ *	structure block offset of the node with the requested path (>=0), on
+ *		success
  *	-FDT_ERR_BADPATH, given path does not begin with '/' or is invalid
  *	-FDT_ERR_NOTFOUND, if the requested node does not exist
  *      -FDT_ERR_BADMAGIC,
@@ -375,10 +431,12 @@ int fdt_path_offset(const void *fdt, const char *path);
  *
  * returns:
  *	pointer to the node's name, on success
- *		If lenp is non-NULL, *lenp contains the length of that name (>=0)
+ *		If lenp is non-NULL, *lenp contains the length of that name
+ *			(>=0)
  *	NULL, on error
  *		if lenp is non-NULL *lenp contains an error code (<0):
- *		-FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ *		-FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE
+ *			tag
  *		-FDT_ERR_BADMAGIC,
  *		-FDT_ERR_BADVERSION,
  *		-FDT_ERR_BADSTATE, standard meanings
@@ -426,6 +484,33 @@ int fdt_first_property_offset(const void *fdt, int nodeoffset);
  */
 int fdt_next_property_offset(const void *fdt, int offset);
 
+/**
+ * fdt_for_each_property_offset - iterate over all properties of a node
+ *
+ * @property_offset:	property offset (int, lvalue)
+ * @fdt:		FDT blob (const void *)
+ * @node:		node offset (int)
+ *
+ * This is actually a wrapper around a for loop and would be used like so:
+ *
+ *	fdt_for_each_property_offset(property, fdt, node) {
+ *		Use property
+ *		...
+ *	}
+ *
+ *	if ((property < 0) && (property != -FDT_ERR_NOT_FOUND)) {
+ *		Error handling
+ *	}
+ *
+ * Note that this is implemented as a macro and property is used as
+ * iterator in the loop. The node variable can be constant or even a
+ * literal.
+ */
+#define fdt_for_each_property_offset(property, fdt, node)	\
+	for (property = fdt_first_property_offset(fdt, node);	\
+	     property >= 0;					\
+	     property = fdt_next_property_offset(fdt, property))
+
 /**
  * fdt_get_property_by_offset - retrieve the property at a given offset
  * @fdt: pointer to the device tree blob
@@ -490,7 +575,8 @@ const struct fdt_property *fdt_get_property_namelen(const void *fdt,
  *	NULL, on error
  *		if lenp is non-NULL, *lenp contains an error code (<0):
  *		-FDT_ERR_NOTFOUND, node does not have named property
- *		-FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ *		-FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE
+ *			tag
  *		-FDT_ERR_BADMAGIC,
  *		-FDT_ERR_BADVERSION,
  *		-FDT_ERR_BADSTATE,
@@ -554,6 +640,13 @@ const void *fdt_getprop_by_offset(const void *fdt, int offset,
  */
 const void *fdt_getprop_namelen(const void *fdt, int nodeoffset,
 				const char *name, int namelen, int *lenp);
+static inline void *fdt_getprop_namelen_w(void *fdt, int nodeoffset,
+					  const char *name, int namelen,
+					  int *lenp)
+{
+	return (void *)(uintptr_t)fdt_getprop_namelen(fdt, nodeoffset, name,
+						      namelen, lenp);
+}
 
 /**
  * fdt_getprop - retrieve the value of a given property
@@ -575,7 +668,8 @@ const void *fdt_getprop_namelen(const void *fdt, int nodeoffset,
  *	NULL, on error
  *		if lenp is non-NULL, *lenp contains an error code (<0):
  *		-FDT_ERR_NOTFOUND, node does not have named property
- *		-FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ *		-FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE
+ *			tag
  *		-FDT_ERR_BADMAGIC,
  *		-FDT_ERR_BADVERSION,
  *		-FDT_ERR_BADSTATE,
@@ -617,7 +711,7 @@ const char *fdt_get_alias_namelen(const void *fdt,
 				  const char *name, int namelen);
 
 /**
- * fdt_get_alias - retreive the path referenced by a given alias
+ * fdt_get_alias - retrieve the path referenced by a given alias
  * @fdt: pointer to the device tree blob
  * @name: name of the alias th look up
  *
@@ -647,7 +741,7 @@ const char *fdt_get_alias(const void *fdt, const char *name);
  *	0, on success
  *		buf contains the absolute path of the node at
  *		nodeoffset, as a NUL-terminated string.
- * 	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ *	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
  *	-FDT_ERR_NOSPACE, the path of the given node is longer than (bufsize-1)
  *		characters and will not fit in the given buffer.
  *	-FDT_ERR_BADMAGIC,
@@ -677,11 +771,11 @@ int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen);
  * structure from the start to nodeoffset.
  *
  * returns:
-
  *	structure block offset of the node at node offset's ancestor
  *		of depth supernodedepth (>=0), on success
- * 	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
-*	-FDT_ERR_NOTFOUND, supernodedepth was greater than the depth of nodeoffset
+ *	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ *	-FDT_ERR_NOTFOUND, supernodedepth was greater than the depth of
+ *		nodeoffset
  *	-FDT_ERR_BADMAGIC,
  *	-FDT_ERR_BADVERSION,
  *	-FDT_ERR_BADSTATE,
@@ -703,7 +797,7 @@ int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset,
  *
  * returns:
  *	depth of the node at nodeoffset (>=0), on success
- * 	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ *	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
  *	-FDT_ERR_BADMAGIC,
  *	-FDT_ERR_BADVERSION,
  *	-FDT_ERR_BADSTATE,
@@ -726,7 +820,7 @@ int fdt_node_depth(const void *fdt, int nodeoffset);
  * returns:
  *	structure block offset of the parent of the node at nodeoffset
  *		(>=0), on success
- * 	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ *	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
  *	-FDT_ERR_BADMAGIC,
  *	-FDT_ERR_BADVERSION,
  *	-FDT_ERR_BADSTATE,
@@ -766,7 +860,7 @@ int fdt_parent_offset(const void *fdt, int nodeoffset);
  *		 on success
  *	-FDT_ERR_NOTFOUND, no node matching the criterion exists in the
  *		tree after startoffset
- * 	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ *	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
  *	-FDT_ERR_BADMAGIC,
  *	-FDT_ERR_BADVERSION,
  *	-FDT_ERR_BADSTATE,
@@ -813,7 +907,7 @@ int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle);
  *	1, if the node has a 'compatible' property, but it does not list
  *		the given string
  *	-FDT_ERR_NOTFOUND, if the given node has no 'compatible' property
- * 	-FDT_ERR_BADOFFSET, if nodeoffset does not refer to a BEGIN_NODE tag
+ *	-FDT_ERR_BADOFFSET, if nodeoffset does not refer to a BEGIN_NODE tag
  *	-FDT_ERR_BADMAGIC,
  *	-FDT_ERR_BADVERSION,
  *	-FDT_ERR_BADSTATE,
@@ -850,7 +944,7 @@ int fdt_node_check_compatible(const void *fdt, int nodeoffset,
  *		 on success
  *	-FDT_ERR_NOTFOUND, no node matching the criterion exists in the
  *		tree after startoffset
- * 	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
+ *	-FDT_ERR_BADOFFSET, nodeoffset does not refer to a BEGIN_NODE tag
  *	-FDT_ERR_BADMAGIC,
  *	-FDT_ERR_BADVERSION,
  *	-FDT_ERR_BADSTATE,
@@ -960,7 +1054,8 @@ const char *fdt_stringlist_get(const void *fdt, int nodeoffset,
  * returns:
  *	0 <= n < FDT_MAX_NCELLS, on success
  *      2, if the node has no #address-cells property
- *      -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid #address-cells property
+ *      -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid
+ *		#address-cells property
  *	-FDT_ERR_BADMAGIC,
  *	-FDT_ERR_BADVERSION,
  *	-FDT_ERR_BADSTATE,
@@ -980,7 +1075,8 @@ int fdt_address_cells(const void *fdt, int nodeoffset);
  * returns:
  *	0 <= n < FDT_MAX_NCELLS, on success
  *      2, if the node has no #address-cells property
- *      -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid #size-cells property
+ *      -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid
+ *		#size-cells property
  *	-FDT_ERR_BADMAGIC,
  *	-FDT_ERR_BADVERSION,
  *	-FDT_ERR_BADSTATE,
@@ -994,6 +1090,27 @@ int fdt_size_cells(const void *fdt, int nodeoffset);
 /* Write-in-place functions                                           */
 /**********************************************************************/
 
+/**
+ * fdt_setprop_inplace_namelen_partial - change a property's value,
+ *                                       but not its size
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ * @namelen: number of characters of name to consider
+ * @idx: index of the property to change in the array
+ * @val: pointer to data to replace the property value with
+ * @len: length of the property value
+ *
+ * Identical to fdt_setprop_inplace(), but modifies the given property
+ * starting from the given index, and using only the first characters
+ * of the name. It is useful when you want to manipulate only one value of
+ * an array and you have a string that doesn't end with \0.
+ */
+int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset,
+					const char *name, int namelen,
+					uint32_t idx, const void *val,
+					int len);
+
 /**
  * fdt_setprop_inplace - change a property's value, but not its size
  * @fdt: pointer to the device tree blob
@@ -1410,6 +1527,36 @@ static inline int fdt_setprop_cell(void *fdt, int nodeoffset, const char *name,
 #define fdt_setprop_string(fdt, nodeoffset, name, str) \
 	fdt_setprop((fdt), (nodeoffset), (name), (str), strlen(str)+1)
 
+
+/**
+ * fdt_setprop_empty - set a property to an empty value
+ * @fdt: pointer to the device tree blob
+ * @nodeoffset: offset of the node whose property to change
+ * @name: name of the property to change
+ *
+ * fdt_setprop_empty() sets the value of the named property in the
+ * given node to an empty (zero length) value, or creates a new empty
+ * property if it does not already exist.
+ *
+ * This function may insert or delete data from the blob, and will
+ * therefore change the offsets of some existing nodes.
+ *
+ * returns:
+ *	0, on success
+ *	-FDT_ERR_NOSPACE, there is insufficient free space in the blob to
+ *		contain the new property value
+ *	-FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag
+ *	-FDT_ERR_BADLAYOUT,
+ *	-FDT_ERR_BADMAGIC,
+ *	-FDT_ERR_BADVERSION,
+ *	-FDT_ERR_BADSTATE,
+ *	-FDT_ERR_BADSTRUCTURE,
+ *	-FDT_ERR_BADLAYOUT,
+ *	-FDT_ERR_TRUNCATED, standard meanings
+ */
+#define fdt_setprop_empty(fdt, nodeoffset, name) \
+	fdt_setprop((fdt), (nodeoffset), (name), NULL, 0)
+
 /**
  * fdt_appendprop - append to or create a property
  * @fdt: pointer to the device tree blob
@@ -1604,9 +1751,11 @@ int fdt_add_subnode_namelen(void *fdt, int parentoffset,
  * change the offsets of some existing nodes.
 
  * returns:
- *	structure block offset of the created nodeequested subnode (>=0), on success
+ *	structure block offset of the created nodeequested subnode (>=0), on
+ *		success
  *	-FDT_ERR_NOTFOUND, if the requested subnode does not exist
- *	-FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE tag
+ *	-FDT_ERR_BADOFFSET, if parentoffset did not point to an FDT_BEGIN_NODE
+ *		tag
  *	-FDT_ERR_EXISTS, if the node at parentoffset already has a subnode of
  *		the given name
  *	-FDT_ERR_NOSPACE, if there is insufficient free space in the
@@ -1644,6 +1793,37 @@ int fdt_add_subnode(void *fdt, int parentoffset, const char *name);
  */
 int fdt_del_node(void *fdt, int nodeoffset);
 
+/**
+ * fdt_overlay_apply - Applies a DT overlay on a base DT
+ * @fdt: pointer to the base device tree blob
+ * @fdto: pointer to the device tree overlay blob
+ *
+ * fdt_overlay_apply() will apply the given device tree overlay on the
+ * given base device tree.
+ *
+ * Expect the base device tree to be modified, even if the function
+ * returns an error.
+ *
+ * returns:
+ *	0, on success
+ *	-FDT_ERR_NOSPACE, there's not enough space in the base device tree
+ *	-FDT_ERR_NOTFOUND, the overlay points to some inexistant nodes or
+ *		properties in the base DT
+ *	-FDT_ERR_BADPHANDLE,
+ *	-FDT_ERR_BADOVERLAY,
+ *	-FDT_ERR_NOPHANDLES,
+ *	-FDT_ERR_INTERNAL,
+ *	-FDT_ERR_BADLAYOUT,
+ *	-FDT_ERR_BADMAGIC,
+ *	-FDT_ERR_BADOFFSET,
+ *	-FDT_ERR_BADPATH,
+ *	-FDT_ERR_BADVERSION,
+ *	-FDT_ERR_BADSTRUCTURE,
+ *	-FDT_ERR_BADSTATE,
+ *	-FDT_ERR_TRUNCATED, standard meanings
+ */
+int fdt_overlay_apply(void *fdt, void *fdto);
+
 /**********************************************************************/
 /* Debugging / informational functions                                */
 /**********************************************************************/
diff --git a/scripts/dtc/libfdt/libfdt_env.h b/scripts/dtc/libfdt/libfdt_env.h
index 9dea97d..952056c 100644
--- a/scripts/dtc/libfdt/libfdt_env.h
+++ b/scripts/dtc/libfdt/libfdt_env.h
@@ -54,19 +54,20 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include <stdlib.h>
 #include <string.h>
 
 #ifdef __CHECKER__
-#define __force __attribute__((force))
-#define __bitwise __attribute__((bitwise))
+#define FDT_FORCE __attribute__((force))
+#define FDT_BITWISE __attribute__((bitwise))
 #else
-#define __force
-#define __bitwise
+#define FDT_FORCE
+#define FDT_BITWISE
 #endif
 
-typedef uint16_t __bitwise fdt16_t;
-typedef uint32_t __bitwise fdt32_t;
-typedef uint64_t __bitwise fdt64_t;
+typedef uint16_t FDT_BITWISE fdt16_t;
+typedef uint32_t FDT_BITWISE fdt32_t;
+typedef uint64_t FDT_BITWISE fdt64_t;
 
 #define EXTRACT_BYTE(x, n)	((unsigned long long)((uint8_t *)&x)[n])
 #define CPU_TO_FDT16(x) ((EXTRACT_BYTE(x, 0) << 8) | EXTRACT_BYTE(x, 1))
@@ -79,29 +80,29 @@ typedef uint64_t __bitwise fdt64_t;
 
 static inline uint16_t fdt16_to_cpu(fdt16_t x)
 {
-	return (__force uint16_t)CPU_TO_FDT16(x);
+	return (FDT_FORCE uint16_t)CPU_TO_FDT16(x);
 }
 static inline fdt16_t cpu_to_fdt16(uint16_t x)
 {
-	return (__force fdt16_t)CPU_TO_FDT16(x);
+	return (FDT_FORCE fdt16_t)CPU_TO_FDT16(x);
 }
 
 static inline uint32_t fdt32_to_cpu(fdt32_t x)
 {
-	return (__force uint32_t)CPU_TO_FDT32(x);
+	return (FDT_FORCE uint32_t)CPU_TO_FDT32(x);
 }
 static inline fdt32_t cpu_to_fdt32(uint32_t x)
 {
-	return (__force fdt32_t)CPU_TO_FDT32(x);
+	return (FDT_FORCE fdt32_t)CPU_TO_FDT32(x);
 }
 
 static inline uint64_t fdt64_to_cpu(fdt64_t x)
 {
-	return (__force uint64_t)CPU_TO_FDT64(x);
+	return (FDT_FORCE uint64_t)CPU_TO_FDT64(x);
 }
 static inline fdt64_t cpu_to_fdt64(uint64_t x)
 {
-	return (__force fdt64_t)CPU_TO_FDT64(x);
+	return (FDT_FORCE fdt64_t)CPU_TO_FDT64(x);
 }
 #undef CPU_TO_FDT64
 #undef CPU_TO_FDT32
diff --git a/scripts/dtc/livetree.c b/scripts/dtc/livetree.c
index e229b84..3673de0 100644
--- a/scripts/dtc/livetree.c
+++ b/scripts/dtc/livetree.c
@@ -204,7 +204,7 @@ struct node *merge_nodes(struct node *old_node, struct node *new_node)
 			}
 		}
 
-		/* if no collision occured, add child to the old node. */
+		/* if no collision occurred, add child to the old node. */
 		if (new_child)
 			add_child(old_node, new_child);
 	}
@@ -242,7 +242,7 @@ void delete_property_by_name(struct node *node, char *name)
 	struct property *prop = node->proplist;
 
 	while (prop) {
-		if (!strcmp(prop->name, name)) {
+		if (streq(prop->name, name)) {
 			delete_property(prop);
 			return;
 		}
@@ -275,7 +275,7 @@ void delete_node_by_name(struct node *parent, char *name)
 	struct node *node = parent->children;
 
 	while (node) {
-		if (!strcmp(node->name, name)) {
+		if (streq(node->name, name)) {
 			delete_node(node);
 			return;
 		}
@@ -296,14 +296,31 @@ void delete_node(struct node *node)
 	delete_labels(&node->labels);
 }
 
+void append_to_property(struct node *node,
+				    char *name, const void *data, int len)
+{
+	struct data d;
+	struct property *p;
+
+	p = get_property(node, name);
+	if (p) {
+		d = data_append_data(p->val, data, len);
+		p->val = d;
+	} else {
+		d = data_append_data(empty_data, data, len);
+		p = build_property(name, d);
+		add_property(node, p);
+	}
+}
+
 struct reserve_info *build_reserve_entry(uint64_t address, uint64_t size)
 {
 	struct reserve_info *new = xmalloc(sizeof(*new));
 
 	memset(new, 0, sizeof(*new));
 
-	new->re.address = address;
-	new->re.size = size;
+	new->address = address;
+	new->size = size;
 
 	return new;
 }
@@ -335,17 +352,19 @@ struct reserve_info *add_reserve_entry(struct reserve_info *list,
 	return list;
 }
 
-struct boot_info *build_boot_info(struct reserve_info *reservelist,
-				  struct node *tree, uint32_t boot_cpuid_phys)
+struct dt_info *build_dt_info(unsigned int dtsflags,
+			      struct reserve_info *reservelist,
+			      struct node *tree, uint32_t boot_cpuid_phys)
 {
-	struct boot_info *bi;
+	struct dt_info *dti;
 
-	bi = xmalloc(sizeof(*bi));
-	bi->reservelist = reservelist;
-	bi->dt = tree;
-	bi->boot_cpuid_phys = boot_cpuid_phys;
+	dti = xmalloc(sizeof(*dti));
+	dti->dtsflags = dtsflags;
+	dti->reservelist = reservelist;
+	dti->dt = tree;
+	dti->boot_cpuid_phys = boot_cpuid_phys;
 
-	return bi;
+	return dti;
 }
 
 /*
@@ -374,7 +393,7 @@ struct property *get_property(struct node *node, const char *propname)
 cell_t propval_cell(struct property *prop)
 {
 	assert(prop->val.len == sizeof(cell_t));
-	return fdt32_to_cpu(*((cell_t *)prop->val.val));
+	return fdt32_to_cpu(*((fdt32_t *)prop->val.val));
 }
 
 struct property *get_property_by_label(struct node *tree, const char *label,
@@ -580,24 +599,24 @@ static int cmp_reserve_info(const void *ax, const void *bx)
 	a = *((const struct reserve_info * const *)ax);
 	b = *((const struct reserve_info * const *)bx);
 
-	if (a->re.address < b->re.address)
+	if (a->address < b->address)
 		return -1;
-	else if (a->re.address > b->re.address)
+	else if (a->address > b->address)
 		return 1;
-	else if (a->re.size < b->re.size)
+	else if (a->size < b->size)
 		return -1;
-	else if (a->re.size > b->re.size)
+	else if (a->size > b->size)
 		return 1;
 	else
 		return 0;
 }
 
-static void sort_reserve_entries(struct boot_info *bi)
+static void sort_reserve_entries(struct dt_info *dti)
 {
 	struct reserve_info *ri, **tbl;
 	int n = 0, i = 0;
 
-	for (ri = bi->reservelist;
+	for (ri = dti->reservelist;
 	     ri;
 	     ri = ri->next)
 		n++;
@@ -607,14 +626,14 @@ static void sort_reserve_entries(struct boot_info *bi)
 
 	tbl = xmalloc(n * sizeof(*tbl));
 
-	for (ri = bi->reservelist;
+	for (ri = dti->reservelist;
 	     ri;
 	     ri = ri->next)
 		tbl[i++] = ri;
 
 	qsort(tbl, n, sizeof(*tbl), cmp_reserve_info);
 
-	bi->reservelist = tbl[0];
+	dti->reservelist = tbl[0];
 	for (i = 0; i < (n-1); i++)
 		tbl[i]->next = tbl[i+1];
 	tbl[n-1]->next = NULL;
@@ -704,8 +723,258 @@ static void sort_node(struct node *node)
 		sort_node(c);
 }
 
-void sort_tree(struct boot_info *bi)
+void sort_tree(struct dt_info *dti)
+{
+	sort_reserve_entries(dti);
+	sort_node(dti->dt);
+}
+
+/* utility helper to avoid code duplication */
+static struct node *build_and_name_child_node(struct node *parent, char *name)
+{
+	struct node *node;
+
+	node = build_node(NULL, NULL);
+	name_node(node, xstrdup(name));
+	add_child(parent, node);
+
+	return node;
+}
+
+static struct node *build_root_node(struct node *dt, char *name)
+{
+	struct node *an;
+
+	an = get_subnode(dt, name);
+	if (!an)
+		an = build_and_name_child_node(dt, name);
+
+	if (!an)
+		die("Could not build root node /%s\n", name);
+
+	return an;
+}
+
+static bool any_label_tree(struct dt_info *dti, struct node *node)
+{
+	struct node *c;
+
+	if (node->labels)
+		return true;
+
+	for_each_child(node, c)
+		if (any_label_tree(dti, c))
+			return true;
+
+	return false;
+}
+
+static void generate_label_tree_internal(struct dt_info *dti,
+					 struct node *an, struct node *node,
+					 bool allocph)
 {
-	sort_reserve_entries(bi);
-	sort_node(bi->dt);
+	struct node *dt = dti->dt;
+	struct node *c;
+	struct property *p;
+	struct label *l;
+
+	/* if there are labels */
+	if (node->labels) {
+
+		/* now add the label in the node */
+		for_each_label(node->labels, l) {
+
+			/* check whether the label already exists */
+			p = get_property(an, l->label);
+			if (p) {
+				fprintf(stderr, "WARNING: label %s already"
+					" exists in /%s", l->label,
+					an->name);
+				continue;
+			}
+
+			/* insert it */
+			p = build_property(l->label,
+				data_copy_mem(node->fullpath,
+						strlen(node->fullpath) + 1));
+			add_property(an, p);
+		}
+
+		/* force allocation of a phandle for this node */
+		if (allocph)
+			(void)get_node_phandle(dt, node);
+	}
+
+	for_each_child(node, c)
+		generate_label_tree_internal(dti, an, c, allocph);
+}
+
+static bool any_fixup_tree(struct dt_info *dti, struct node *node)
+{
+	struct node *c;
+	struct property *prop;
+	struct marker *m;
+
+	for_each_property(node, prop) {
+		m = prop->val.markers;
+		for_each_marker_of_type(m, REF_PHANDLE) {
+			if (!get_node_by_ref(dti->dt, m->ref))
+				return true;
+		}
+	}
+
+	for_each_child(node, c) {
+		if (any_fixup_tree(dti, c))
+			return true;
+	}
+
+	return false;
+}
+
+static void add_fixup_entry(struct dt_info *dti, struct node *fn,
+			    struct node *node, struct property *prop,
+			    struct marker *m)
+{
+	char *entry;
+
+	/* m->ref can only be a REF_PHANDLE, but check anyway */
+	assert(m->type == REF_PHANDLE);
+
+	/* there shouldn't be any ':' in the arguments */
+	if (strchr(node->fullpath, ':') || strchr(prop->name, ':'))
+		die("arguments should not contain ':'\n");
+
+	xasprintf(&entry, "%s:%s:%u",
+			node->fullpath, prop->name, m->offset);
+	append_to_property(fn, m->ref, entry, strlen(entry) + 1);
+
+	free(entry);
+}
+
+static void generate_fixups_tree_internal(struct dt_info *dti,
+					  struct node *fn,
+					  struct node *node)
+{
+	struct node *dt = dti->dt;
+	struct node *c;
+	struct property *prop;
+	struct marker *m;
+	struct node *refnode;
+
+	for_each_property(node, prop) {
+		m = prop->val.markers;
+		for_each_marker_of_type(m, REF_PHANDLE) {
+			refnode = get_node_by_ref(dt, m->ref);
+			if (!refnode)
+				add_fixup_entry(dti, fn, node, prop, m);
+		}
+	}
+
+	for_each_child(node, c)
+		generate_fixups_tree_internal(dti, fn, c);
+}
+
+static bool any_local_fixup_tree(struct dt_info *dti, struct node *node)
+{
+	struct node *c;
+	struct property *prop;
+	struct marker *m;
+
+	for_each_property(node, prop) {
+		m = prop->val.markers;
+		for_each_marker_of_type(m, REF_PHANDLE) {
+			if (get_node_by_ref(dti->dt, m->ref))
+				return true;
+		}
+	}
+
+	for_each_child(node, c) {
+		if (any_local_fixup_tree(dti, c))
+			return true;
+	}
+
+	return false;
+}
+
+static void add_local_fixup_entry(struct dt_info *dti,
+		struct node *lfn, struct node *node,
+		struct property *prop, struct marker *m,
+		struct node *refnode)
+{
+	struct node *wn, *nwn;	/* local fixup node, walk node, new */
+	fdt32_t value_32;
+	char **compp;
+	int i, depth;
+
+	/* walk back retreiving depth */
+	depth = 0;
+	for (wn = node; wn; wn = wn->parent)
+		depth++;
+
+	/* allocate name array */
+	compp = xmalloc(sizeof(*compp) * depth);
+
+	/* store names in the array */
+	for (wn = node, i = depth - 1; wn; wn = wn->parent, i--)
+		compp[i] = wn->name;
+
+	/* walk the path components creating nodes if they don't exist */
+	for (wn = lfn, i = 1; i < depth; i++, wn = nwn) {
+		/* if no node exists, create it */
+		nwn = get_subnode(wn, compp[i]);
+		if (!nwn)
+			nwn = build_and_name_child_node(wn, compp[i]);
+	}
+
+	free(compp);
+
+	value_32 = cpu_to_fdt32(m->offset);
+	append_to_property(wn, prop->name, &value_32, sizeof(value_32));
+}
+
+static void generate_local_fixups_tree_internal(struct dt_info *dti,
+						struct node *lfn,
+						struct node *node)
+{
+	struct node *dt = dti->dt;
+	struct node *c;
+	struct property *prop;
+	struct marker *m;
+	struct node *refnode;
+
+	for_each_property(node, prop) {
+		m = prop->val.markers;
+		for_each_marker_of_type(m, REF_PHANDLE) {
+			refnode = get_node_by_ref(dt, m->ref);
+			if (refnode)
+				add_local_fixup_entry(dti, lfn, node, prop, m, refnode);
+		}
+	}
+
+	for_each_child(node, c)
+		generate_local_fixups_tree_internal(dti, lfn, c);
+}
+
+void generate_label_tree(struct dt_info *dti, char *name, bool allocph)
+{
+	if (!any_label_tree(dti, dti->dt))
+		return;
+	generate_label_tree_internal(dti, build_root_node(dti->dt, name),
+				     dti->dt, allocph);
+}
+
+void generate_fixups_tree(struct dt_info *dti, char *name)
+{
+	if (!any_fixup_tree(dti, dti->dt))
+		return;
+	generate_fixups_tree_internal(dti, build_root_node(dti->dt, name),
+				      dti->dt);
+}
+
+void generate_local_fixups_tree(struct dt_info *dti, char *name)
+{
+	if (!any_local_fixup_tree(dti, dti->dt))
+		return;
+	generate_local_fixups_tree_internal(dti, build_root_node(dti->dt, name),
+					    dti->dt);
 }
diff --git a/scripts/dtc/srcpos.c b/scripts/dtc/srcpos.c
index f534c22..9d38459 100644
--- a/scripts/dtc/srcpos.c
+++ b/scripts/dtc/srcpos.c
@@ -246,46 +246,27 @@ srcpos_copy(struct srcpos *pos)
 	return pos_new;
 }
 
-
-
-void
-srcpos_dump(struct srcpos *pos)
-{
-	printf("file        : \"%s\"\n",
-	       pos->file ? (char *) pos->file : "<no file>");
-	printf("first_line  : %d\n", pos->first_line);
-	printf("first_column: %d\n", pos->first_column);
-	printf("last_line   : %d\n", pos->last_line);
-	printf("last_column : %d\n", pos->last_column);
-	printf("file        : %s\n", pos->file->name);
-}
-
-
 char *
 srcpos_string(struct srcpos *pos)
 {
 	const char *fname = "<no-file>";
 	char *pos_str;
-	int rc;
 
-	if (pos)
+	if (pos->file && pos->file->name)
 		fname = pos->file->name;
 
 
 	if (pos->first_line != pos->last_line)
-		rc = asprintf(&pos_str, "%s:%d.%d-%d.%d", fname,
-			      pos->first_line, pos->first_column,
-			      pos->last_line, pos->last_column);
+		xasprintf(&pos_str, "%s:%d.%d-%d.%d", fname,
+			  pos->first_line, pos->first_column,
+			  pos->last_line, pos->last_column);
 	else if (pos->first_column != pos->last_column)
-		rc = asprintf(&pos_str, "%s:%d.%d-%d", fname,
-			      pos->first_line, pos->first_column,
-			      pos->last_column);
+		xasprintf(&pos_str, "%s:%d.%d-%d", fname,
+			  pos->first_line, pos->first_column,
+			  pos->last_column);
 	else
-		rc = asprintf(&pos_str, "%s:%d.%d", fname,
-			      pos->first_line, pos->first_column);
-
-	if (rc == -1)
-		die("Couldn't allocate in srcpos string");
+		xasprintf(&pos_str, "%s:%d.%d", fname,
+			  pos->first_line, pos->first_column);
 
 	return pos_str;
 }
diff --git a/scripts/dtc/srcpos.h b/scripts/dtc/srcpos.h
index f81827b..7caca82 100644
--- a/scripts/dtc/srcpos.h
+++ b/scripts/dtc/srcpos.h
@@ -22,6 +22,7 @@
 
 #include <stdio.h>
 #include <stdbool.h>
+#include "util.h"
 
 struct srcfile_state {
 	FILE *f;
@@ -105,14 +106,11 @@ extern struct srcpos srcpos_empty;
 extern void srcpos_update(struct srcpos *pos, const char *text, int len);
 extern struct srcpos *srcpos_copy(struct srcpos *pos);
 extern char *srcpos_string(struct srcpos *pos);
-extern void srcpos_dump(struct srcpos *pos);
-
-extern void srcpos_verror(struct srcpos *pos, const char *prefix,
-			  const char *fmt, va_list va)
-	__attribute__((format(printf, 3, 0)));
-extern void srcpos_error(struct srcpos *pos, const char *prefix,
-			 const char *fmt, ...)
-	__attribute__((format(printf, 3, 4)));
+
+extern void PRINTF(3, 0) srcpos_verror(struct srcpos *pos, const char *prefix,
+					const char *fmt, va_list va);
+extern void PRINTF(3, 4) srcpos_error(struct srcpos *pos, const char *prefix,
+				      const char *fmt, ...);
 
 extern void srcpos_set_line(char *f, int l);
 
diff --git a/scripts/dtc/treesource.c b/scripts/dtc/treesource.c
index a55d1d1..2461a3d 100644
--- a/scripts/dtc/treesource.c
+++ b/scripts/dtc/treesource.c
@@ -25,12 +25,12 @@ extern FILE *yyin;
 extern int yyparse(void);
 extern YYLTYPE yylloc;
 
-struct boot_info *the_boot_info;
+struct dt_info *parser_output;
 bool treesource_error;
 
-struct boot_info *dt_from_source(const char *fname)
+struct dt_info *dt_from_source(const char *fname)
 {
-	the_boot_info = NULL;
+	parser_output = NULL;
 	treesource_error = false;
 
 	srcfile_push(fname);
@@ -43,7 +43,7 @@ struct boot_info *dt_from_source(const char *fname)
 	if (treesource_error)
 		die("Syntax error parsing input tree\n");
 
-	return the_boot_info;
+	return parser_output;
 }
 
 static void write_prefix(FILE *f, int level)
@@ -137,7 +137,7 @@ static void write_propval_string(FILE *f, struct data val)
 static void write_propval_cells(FILE *f, struct data val)
 {
 	void *propend = val.val + val.len;
-	cell_t *cp = (cell_t *)val.val;
+	fdt32_t *cp = (fdt32_t *)val.val;
 	struct marker *m = val.markers;
 
 	fprintf(f, "<");
@@ -263,22 +263,22 @@ static void write_tree_source_node(FILE *f, struct node *tree, int level)
 }
 
 
-void dt_to_source(FILE *f, struct boot_info *bi)
+void dt_to_source(FILE *f, struct dt_info *dti)
 {
 	struct reserve_info *re;
 
 	fprintf(f, "/dts-v1/;\n\n");
 
-	for (re = bi->reservelist; re; re = re->next) {
+	for (re = dti->reservelist; re; re = re->next) {
 		struct label *l;
 
 		for_each_label(re->labels, l)
 			fprintf(f, "%s: ", l->label);
 		fprintf(f, "/memreserve/\t0x%016llx 0x%016llx;\n",
-			(unsigned long long)re->re.address,
-			(unsigned long long)re->re.size);
+			(unsigned long long)re->address,
+			(unsigned long long)re->size);
 	}
 
-	write_tree_source_node(f, bi->dt, 0);
+	write_tree_source_node(f, dti->dt, 0);
 }
 
diff --git a/scripts/dtc/util.c b/scripts/dtc/util.c
index fb124ee..9953c32 100644
--- a/scripts/dtc/util.c
+++ b/scripts/dtc/util.c
@@ -46,6 +46,36 @@ char *xstrdup(const char *s)
 	return d;
 }
 
+/* based in part from (3) vsnprintf */
+int xasprintf(char **strp, const char *fmt, ...)
+{
+	int n, size = 128;	/* start with 128 bytes */
+	char *p;
+	va_list ap;
+
+	/* initial pointer is NULL making the fist realloc to be malloc */
+	p = NULL;
+	while (1) {
+		p = xrealloc(p, size);
+
+		/* Try to print in the allocated space. */
+		va_start(ap, fmt);
+		n = vsnprintf(p, size, fmt, ap);
+		va_end(ap);
+
+		/* If that worked, return the string. */
+		if (n > -1 && n < size)
+			break;
+		/* Else try again with more space. */
+		if (n > -1)	/* glibc 2.1 */
+			size = n + 1; /* precisely what is needed */
+		else		/* glibc 2.0 */
+			size *= 2; /* twice the old size */
+	}
+	*strp = p;
+	return strlen(p);
+}
+
 char *join_path(const char *path, const char *name)
 {
 	int lenp = strlen(path);
@@ -366,7 +396,7 @@ void utilfdt_print_data(const char *data, int len)
 		} while (s < data + len);
 
 	} else if ((len % 4) == 0) {
-		const uint32_t *cell = (const uint32_t *)data;
+		const fdt32_t *cell = (const fdt32_t *)data;
 
 		printf(" = <");
 		for (i = 0, len /= 4; i < len; i++)
@@ -382,15 +412,16 @@ void utilfdt_print_data(const char *data, int len)
 	}
 }
 
-void util_version(void)
+void NORETURN util_version(void)
 {
 	printf("Version: %s\n", DTC_VERSION);
 	exit(0);
 }
 
-void util_usage(const char *errmsg, const char *synopsis,
-		const char *short_opts, struct option const long_opts[],
-		const char * const opts_help[])
+void NORETURN util_usage(const char *errmsg, const char *synopsis,
+			 const char *short_opts,
+			 struct option const long_opts[],
+			 const char * const opts_help[])
 {
 	FILE *fp = errmsg ? stderr : stdout;
 	const char a_arg[] = "<arg>";
diff --git a/scripts/dtc/util.h b/scripts/dtc/util.h
index f800b60..ad5f411 100644
--- a/scripts/dtc/util.h
+++ b/scripts/dtc/util.h
@@ -25,9 +25,17 @@
  *                                                                   USA
  */
 
+#ifdef __GNUC__
+#define PRINTF(i, j)	__attribute__((format (printf, i, j)))
+#define NORETURN	__attribute__((noreturn))
+#else
+#define PRINTF(i, j)
+#define NORETURN
+#endif
+
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
-static inline void __attribute__((noreturn)) die(const char *str, ...)
+static inline void NORETURN PRINTF(1, 2) die(const char *str, ...)
 {
 	va_list ap;
 
@@ -53,12 +61,14 @@ static inline void *xrealloc(void *p, size_t len)
 	void *new = realloc(p, len);
 
 	if (!new)
-		die("realloc() failed (len=%d)\n", len);
+		die("realloc() failed (len=%zd)\n", len);
 
 	return new;
 }
 
 extern char *xstrdup(const char *s);
+
+extern int PRINTF(2, 3) xasprintf(char **strp, const char *fmt, ...);
 extern char *join_path(const char *path, const char *name);
 
 /**
@@ -187,7 +197,7 @@ void utilfdt_print_data(const char *data, int len);
 /**
  * Show source version and exit
  */
-void util_version(void) __attribute__((noreturn));
+void NORETURN util_version(void);
 
 /**
  * Show usage and exit
@@ -201,9 +211,10 @@ void util_version(void) __attribute__((noreturn));
  * @param long_opts	The structure of long options
  * @param opts_help	An array of help strings (should align with long_opts)
  */
-void util_usage(const char *errmsg, const char *synopsis,
-		const char *short_opts, struct option const long_opts[],
-		const char * const opts_help[]) __attribute__((noreturn));
+void NORETURN util_usage(const char *errmsg, const char *synopsis,
+			 const char *short_opts,
+			 struct option const long_opts[],
+			 const char * const opts_help[]);
 
 /**
  * Show usage and exit
diff --git a/scripts/dtc/version_gen.h b/scripts/dtc/version_gen.h
index ad9b05a..859564e 100644
--- a/scripts/dtc/version_gen.h
+++ b/scripts/dtc/version_gen.h
@@ -1 +1 @@
-#define DTC_VERSION "DTC 1.4.1-g53bf130b"
+#define DTC_VERSION "DTC 1.4.4"
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 3c575cd..172be74 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -69,7 +69,7 @@ set_debarch() {
 		echo "" >&2
 		echo "** ** **  WARNING  ** ** **" >&2
 		echo "" >&2
-		echo "Your architecture doesn't have it's equivalent" >&2
+		echo "Your architecture doesn't have its equivalent" >&2
 		echo "Debian userspace architecture defined!" >&2
 		echo "Falling back to using your current userspace instead!" >&2
 		echo "Please add support for $UTS_MACHINE to ${0} ..." >&2
@@ -151,9 +151,18 @@ else
 fi
 
 if grep -q "^CONFIG_OF=y" $KCONFIG_CONFIG ; then
+	mkdir -p "$tmpdir/boot/dtbs/$version"
 	# Only some architectures with OF support have this target
 	if grep -q dtbs_install "${srctree}/arch/$SRCARCH/Makefile"; then
-		$MAKE KBUILD_SRC= INSTALL_DTBS_PATH="$tmpdir/usr/lib/$packagename" dtbs_install
+		$MAKE KBUILD_SRC= INSTALL_DTBS_PATH="$tmpdir/boot/dtbs/$version" dtbs_install
+	else
+		$MAKE KBUILD_SRC= dtbs
+		find arch/arm/boot/ -iname "*.dtb" -exec cp -v '{}' "$tmpdir/boot/dtbs/$version" \;
+	fi
+
+	#make dtbs_install seems to add an .old directory
+	if [ -d "$tmpdir/boot/dtbs/$version.old" ] ; then
+		rm -rf "$tmpdir/boot/dtbs/$version.old"
 	fi
 fi
 
@@ -262,10 +271,10 @@ EOF
 cat <<EOF > debian/copyright
 This is a packacked upstream version of the Linux kernel.
 
-The sources may be found at most Linux ftp sites, including:
-ftp://ftp.kernel.org/pub/linux/kernel
+The sources may be found at most Linux archive sites, including:
+https://www.kernel.org/pub/linux/kernel
 
-Copyright: 1991 - 2015 Linus Torvalds and others.
+Copyright: 1991 - 2017 Linus Torvalds and others.
 
 The git repository for mainline kernel development is at:
 git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
@@ -288,7 +297,6 @@ Section: kernel
 Priority: optional
 Maintainer: $maintainer
 Build-Depends: $build_depends
-Standards-Version: 3.8.4
 Homepage: http://www.kernel.org/
 EOF
 
@@ -296,7 +304,6 @@ if [ "$ARCH" = "um" ]; then
 	cat <<EOF >> debian/control
 
 Package: $packagename
-Provides: linux-image, linux-image-2.6, linux-modules-$version
 Architecture: any
 Description: User Mode Linux kernel, version $version
  User-mode Linux is a port of the Linux kernel to its own system call
@@ -313,7 +320,6 @@ else
 	cat <<EOF >> debian/control
 
 Package: $packagename
-Provides: linux-image, linux-image-2.6, linux-modules-$version
 Suggests: $fwpackagename
 Architecture: any
 Description: Linux kernel, version $version
@@ -346,7 +352,6 @@ rm -f "$objtree/debian/hdrsrcfiles" "$objtree/debian/hdrobjfiles"
 cat <<EOF >> debian/control
 
 Package: $kernel_headers_packagename
-Provides: linux-headers, linux-headers-2.6
 Architecture: any
 Description: Linux kernel headers for $KERNELRELEASE on \${kernel:debarch}
  This package provides kernel header files for $KERNELRELEASE on \${kernel:debarch}
@@ -404,7 +409,6 @@ if [ -n "$BUILD_DEBUG" ] ; then
 
 Package: $dbg_packagename
 Section: debug
-Provides: linux-debug, linux-debug-$version
 Architecture: any
 Description: Linux kernel debugging symbols for $version
  This package will come in handy if you need to debug the kernel. It provides
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index c67667b..0f40b09 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -135,6 +135,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_TAS5086 if I2C
 	select SND_SOC_TAS571X if I2C
 	select SND_SOC_TAS5720 if I2C
+	select SND_SOC_TDM
 	select SND_SOC_TFA9879 if I2C
 	select SND_SOC_TLV320AIC23_I2C if I2C
 	select SND_SOC_TLV320AIC23_SPI if SPI_MASTER
@@ -812,6 +813,9 @@ config SND_SOC_TAS5720
 	  Enable support for Texas Instruments TAS5720L/M high-efficiency mono
 	  Class-D audio power amplifiers.
 
+config SND_SOC_TDM
+	tristate "Generic TDM codec"
+
 config SND_SOC_TFA9879
 	tristate "NXP Semiconductors TFA9879 amplifier"
 	depends on I2C
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 958cd49..ea82c79 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -142,6 +142,7 @@ snd-soc-sti-sas-objs := sti-sas.o
 snd-soc-tas5086-objs := tas5086.o
 snd-soc-tas571x-objs := tas571x.o
 snd-soc-tas5720-objs := tas5720.o
+snd-soc-tdm-objs := tdm.o
 snd-soc-tfa9879-objs := tfa9879.o
 snd-soc-tlv320aic23-objs := tlv320aic23.o
 snd-soc-tlv320aic23-i2c-objs := tlv320aic23-i2c.o
@@ -363,6 +364,7 @@ obj-$(CONFIG_SND_SOC_TAS2552)	+= snd-soc-tas2552.o
 obj-$(CONFIG_SND_SOC_TAS5086)	+= snd-soc-tas5086.o
 obj-$(CONFIG_SND_SOC_TAS571X)	+= snd-soc-tas571x.o
 obj-$(CONFIG_SND_SOC_TAS5720)	+= snd-soc-tas5720.o
+obj-$(CONFIG_SND_SOC_TDM)	+= snd-soc-tdm.o
 obj-$(CONFIG_SND_SOC_TFA9879)	+= snd-soc-tfa9879.o
 obj-$(CONFIG_SND_SOC_TLV320AIC23)	+= snd-soc-tlv320aic23.o
 obj-$(CONFIG_SND_SOC_TLV320AIC23_I2C)	+= snd-soc-tlv320aic23-i2c.o
diff --git a/sound/soc/codecs/spdif_transmitter.c b/sound/soc/codecs/spdif_transmitter.c
index ee36753..b682f12 100644
--- a/sound/soc/codecs/spdif_transmitter.c
+++ b/sound/soc/codecs/spdif_transmitter.c
@@ -27,7 +27,9 @@
 #define STUB_RATES	SNDRV_PCM_RATE_8000_192000
 #define STUB_FORMATS	(SNDRV_PCM_FMTBIT_S16_LE | \
 			SNDRV_PCM_FMTBIT_S20_3LE | \
-			SNDRV_PCM_FMTBIT_S24_LE)
+			SNDRV_PCM_FMTBIT_S24_3LE | \
+			SNDRV_PCM_FMTBIT_S24_LE | \
+			SNDRV_PCM_FMTBIT_S32_LE)
 
 static const struct snd_soc_dapm_widget dit_widgets[] = {
 	SND_SOC_DAPM_OUTPUT("spdif-out"),
diff --git b/sound/soc/codecs/tdm.c b/sound/soc/codecs/tdm.c
new file mode 100644
index 0000000..b8fb3b8
--- /dev/null
+++ b/sound/soc/codecs/tdm.c
@@ -0,0 +1,112 @@
+/*
+ * ALSA SoC generic TDM codec driver
+ *
+ * Author:      Matthijs van Duin <matthijsvanduin@gmail.com>
+ * Copyright:   (C) 2016  Dutch & Dutch
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * TODO Allow customization via device tree.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <sound/soc.h>
+#include <sound/pcm.h>
+#include <sound/initval.h>
+#include <linux/of.h>
+
+#define DRV_NAME "tdm-audio"
+
+/* As far as I can tell the LE/3LE/BE/3BE suffix merely indicates how the data
+ * was represented in memory, so why would the codec care?  On the other hand,
+ * how do you indicate the bit-endianness on wire?  */
+
+#define TDM_FORMATS	(SNDRV_PCM_FMTBIT_S8 | \
+			SNDRV_PCM_FMTBIT_U8 | \
+			SNDRV_PCM_FMTBIT_S16_LE | \
+			SNDRV_PCM_FMTBIT_U16_LE | \
+			SNDRV_PCM_FMTBIT_S20_3LE | \
+			SNDRV_PCM_FMTBIT_U20_3LE | \
+			SNDRV_PCM_FMTBIT_S24_3LE | \
+			SNDRV_PCM_FMTBIT_U24_3LE | \
+			SNDRV_PCM_FMTBIT_S24_LE | \
+			SNDRV_PCM_FMTBIT_U24_LE | \
+			SNDRV_PCM_FMTBIT_S32_LE | \
+			SNDRV_PCM_FMTBIT_U32_LE)
+
+static const struct snd_soc_dapm_widget tdm_audio_widgets[] = {
+	SND_SOC_DAPM_OUTPUT("Sink"),
+	SND_SOC_DAPM_INPUT("Source"),
+};
+
+static const struct snd_soc_dapm_route tdm_audio_routes[] = {
+	{ "Sink", NULL, "Playback" },
+	{ "Capture", NULL, "Source" },
+};
+
+static struct snd_soc_codec_driver soc_codec_tdm_audio = {
+	.component_driver = {
+		.dapm_widgets		= tdm_audio_widgets,
+		.num_dapm_widgets	= ARRAY_SIZE(tdm_audio_widgets),
+		.dapm_routes		= tdm_audio_routes,
+		.num_dapm_routes	= ARRAY_SIZE(tdm_audio_routes),
+	},
+};
+
+static struct snd_soc_dai_driver tdm_audio_dai = {
+	.name		= "tdm_audio",
+	.playback 	= {
+		.stream_name	= "Playback",
+		.channels_min	= 1,
+		.channels_max	= 16,
+		.rates		= SNDRV_PCM_RATE_CONTINUOUS,
+		.formats	= TDM_FORMATS,
+	},
+	.capture 	= {
+		.stream_name	= "Capture",
+		.channels_min	= 1,
+		.channels_max	= 16,
+		.rates		= SNDRV_PCM_RATE_CONTINUOUS,
+		.formats	= TDM_FORMATS,
+	},
+};
+
+static int tdm_audio_probe(struct platform_device *pdev)
+{
+	return snd_soc_register_codec(&pdev->dev, &soc_codec_tdm_audio,
+			&tdm_audio_dai, 1);
+}
+
+static int tdm_audio_remove(struct platform_device *pdev)
+{
+	snd_soc_unregister_codec(&pdev->dev);
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id tdm_audio_dt_ids[] = {
+	{ .compatible = "linux,tdm-audio", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tdm_audio_dt_ids);
+#endif
+
+static struct platform_driver tdm_audio_driver = {
+	.probe		= tdm_audio_probe,
+	.remove		= tdm_audio_remove,
+	.driver		= {
+		.name	= DRV_NAME,
+		.of_match_table = of_match_ptr(tdm_audio_dt_ids),
+	},
+};
+
+module_platform_driver(tdm_audio_driver);
+
+MODULE_AUTHOR("Matthijs van Duin <matthijs@dutchdutch.com>");
+MODULE_DESCRIPTION("Generic TDM codec driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 3c5a980..35e441c 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -151,6 +151,8 @@ static void mcasp_set_ctl_reg(struct davinci_mcasp *mcasp, u32 ctl_reg, u32 val)
 	mcasp_set_bits(mcasp, ctl_reg, val);
 
 	/* programming GBLCTL needs to read back from GBLCTL and verfiy */
+	ctl_reg = DAVINCI_MCASP_GBLCTL_REG;
+
 	/* loop count is to avoid the lock-up */
 	for (i = 0; i < 1000; i++) {
 		if ((mcasp_get_reg(mcasp, ctl_reg) & val) == val)
@@ -572,6 +574,12 @@ static int __davinci_mcasp_set_clkdiv(struct davinci_mcasp *mcasp, int div_id,
 		 * tdm_slot width by dividing the the ratio by the
 		 * number of configured tdm slots.
 		 */
+		if (mcasp->op_mode == DAVINCI_MCASP_DIT_MODE) {
+			if (div != 128)
+				dev_warn(mcasp->dev,
+					"%s(): BCLK/LRCLK %d requested, must be 128 for DIT mode", __func__, div);
+			break;
+		}
 		mcasp->slot_width = div / mcasp->tdm_slots;
 		if (div % mcasp->tdm_slots)
 			dev_warn(mcasp->dev,
@@ -696,56 +704,59 @@ static int davinci_mcasp_set_tdm_slot(struct snd_soc_dai *dai,
 }
 
 static int davinci_config_channel_size(struct davinci_mcasp *mcasp,
-				       int sample_width)
+				       u32 sample_width)
 {
-	u32 fmt;
-	u32 tx_rotate = (sample_width / 4) & 0x7;
-	u32 mask = (1ULL << sample_width) - 1;
-	u32 slot_width = sample_width;
+	u32 mask, tx_rotate, rx_rotate;
+	u32 slot_width, fmt;
 
 	/*
-	 * For captured data we should not rotate, inversion and masking is
-	 * enoguh to get the data to the right position:
-	 * Format	  data from bus		after reverse (XRBUF)
-	 * S16_LE:	|LSB|MSB|xxx|xxx|	|xxx|xxx|MSB|LSB|
-	 * S24_3LE:	|LSB|DAT|MSB|xxx|	|xxx|MSB|DAT|LSB|
-	 * S24_LE:	|LSB|DAT|MSB|xxx|	|xxx|MSB|DAT|LSB|
-	 * S32_LE:	|LSB|DAT|DAT|MSB|	|MSB|DAT|DAT|LSB|
+	 * Sample data is always right-justified.  Apply mask and rotate right
+	 * to left-justified.  For receive the steps are in reverse order (but
+	 * still rotates right).
 	 */
-	u32 rx_rotate = 0;
+	mask = GENMASK(sample_width, 0);
+	mcasp_set_reg(mcasp, DAVINCI_MCASP_TXMASK_REG, mask);
+	mcasp_set_reg(mcasp, DAVINCI_MCASP_RXMASK_REG, mask);
+
+	tx_rotate = sample_width;
+	rx_rotate = -sample_width;
 
 	/*
-	 * Setting the tdm slot width either with set_clkdiv() or
-	 * set_tdm_slot() allows us to for example send 32 bits per
-	 * channel to the codec, while only 16 of them carry audio
-	 * payload.
+	 * For big-endian formats (everything except DIT), McASP needs the slot
+	 * data to be left-aligned for transmit, whereas received slot data is
+	 * delivered right-aligned:
+	 *
+	 *   +-----------------------------------------------+---------------+
+	 * <--shift-out-         slot data                   |               |
+	 *   +-----------------------------------------------+---------------+
+	 *
+	 *   +---------------+-----------------------------------------------+
+	 *   |               |                   slot data        <--shift-in--
+	 *   +---------------+-----------------------------------------------+
+	 *
+	 * For little-endian formats (DIT only) the reverse is true.
 	 */
-	if (mcasp->slot_width) {
-		/*
-		 * When we have more bclk then it is needed for the
-		 * data, we need to use the rotation to move the
-		 * received samples to have correct alignment.
-		 */
-		slot_width = mcasp->slot_width;
-		rx_rotate = (slot_width - sample_width) / 4;
+	if (mcasp->op_mode == DAVINCI_MCASP_DIT_MODE) {
+		tx_rotate -= 24;
+		slot_width = 32;
+		fmt = 0;  /* little endian */
+	} else {
+		slot_width = mcasp->slot_width ?: sample_width;
+		rx_rotate += slot_width;
+		fmt = TXORD;  /* big endian */
 	}
 
 	/* mapping of the XSSZ bit-field as described in the datasheet */
-	fmt = (slot_width >> 1) - 1;
+	fmt |= TXSSZ((slot_width >> 1) - 1);
 
-	if (mcasp->op_mode != DAVINCI_MCASP_DIT_MODE) {
-		mcasp_mod_bits(mcasp, DAVINCI_MCASP_RXFMT_REG, RXSSZ(fmt),
-			       RXSSZ(0x0F));
-		mcasp_mod_bits(mcasp, DAVINCI_MCASP_TXFMT_REG, TXSSZ(fmt),
-			       TXSSZ(0x0F));
-		mcasp_mod_bits(mcasp, DAVINCI_MCASP_TXFMT_REG, TXROT(tx_rotate),
-			       TXROT(7));
-		mcasp_mod_bits(mcasp, DAVINCI_MCASP_RXFMT_REG, RXROT(rx_rotate),
-			       RXROT(7));
-		mcasp_set_reg(mcasp, DAVINCI_MCASP_RXMASK_REG, mask);
-	}
+	tx_rotate = TXROT((tx_rotate & 31) >> 2);
+	rx_rotate = TXROT((rx_rotate & 31) >> 2);
 
-	mcasp_set_reg(mcasp, DAVINCI_MCASP_TXMASK_REG, mask);
+	if (!mcasp->dat_port)
+		fmt |= TXSEL;
+
+	mcasp_mod_bits(mcasp, DAVINCI_MCASP_TXFMT_REG, fmt | tx_rotate, 0xffff);
+	mcasp_mod_bits(mcasp, DAVINCI_MCASP_RXFMT_REG, fmt | rx_rotate, 0xffff);
 
 	return 0;
 }
@@ -866,7 +877,6 @@ static int mcasp_i2s_hw_param(struct davinci_mcasp *mcasp, int stream,
 	int total_slots;
 	int active_serializers;
 	u32 mask = 0;
-	u32 busel = 0;
 
 	total_slots = mcasp->tdm_slots;
 
@@ -902,17 +912,12 @@ static int mcasp_i2s_hw_param(struct davinci_mcasp *mcasp, int stream,
 	}
 	mcasp_clr_bits(mcasp, DAVINCI_MCASP_ACLKXCTL_REG, TX_ASYNC);
 
-	if (!mcasp->dat_port)
-		busel = TXSEL;
-
 	if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		mcasp_set_reg(mcasp, DAVINCI_MCASP_TXTDM_REG, mask);
-		mcasp_set_bits(mcasp, DAVINCI_MCASP_TXFMT_REG, busel | TXORD);
 		mcasp_mod_bits(mcasp, DAVINCI_MCASP_TXFMCTL_REG,
 			       FSXMOD(total_slots), FSXMOD(0x1FF));
 	} else if (stream == SNDRV_PCM_STREAM_CAPTURE) {
 		mcasp_set_reg(mcasp, DAVINCI_MCASP_RXTDM_REG, mask);
-		mcasp_set_bits(mcasp, DAVINCI_MCASP_RXFMT_REG, busel | RXORD);
 		mcasp_mod_bits(mcasp, DAVINCI_MCASP_RXFMCTL_REG,
 			       FSRMOD(total_slots), FSRMOD(0x1FF));
 		/*
@@ -935,23 +940,13 @@ static int mcasp_dit_hw_param(struct davinci_mcasp *mcasp,
 	u32 cs_value = 0;
 	u8 *cs_bytes = (u8*) &cs_value;
 
-	/* Set the TX format : 24 bit right rotation, 32 bit slot, Pad 0
-	   and LSB first */
-	mcasp_set_bits(mcasp, DAVINCI_MCASP_TXFMT_REG, TXROT(6) | TXSSZ(15));
-
 	/* Set TX frame synch : DIT Mode, 1 bit width, internal, rising edge */
 	mcasp_set_reg(mcasp, DAVINCI_MCASP_TXFMCTL_REG, AFSXE | FSXMOD(0x180));
 
 	/* Set the TX tdm : for all the slots */
 	mcasp_set_reg(mcasp, DAVINCI_MCASP_TXTDM_REG, 0xFFFFFFFF);
 
-	/* Set the TX clock controls : div = 1 and internal */
-	mcasp_set_bits(mcasp, DAVINCI_MCASP_ACLKXCTL_REG, ACLKXE | TX_ASYNC);
-
-	mcasp_clr_bits(mcasp, DAVINCI_MCASP_XEVTCTL_REG, TXDATADMADIS);
-
-	/* Only 44100 and 48000 are valid, both have the same setting */
-	mcasp_set_bits(mcasp, DAVINCI_MCASP_AHCLKXCTL_REG, AHCLKXDIV(3));
+	mcasp_set_bits(mcasp, DAVINCI_MCASP_ACLKXCTL_REG, TX_ASYNC);
 
 	/* Enable the DIT */
 	mcasp_set_bits(mcasp, DAVINCI_MCASP_TXDITCTL_REG, DITEN);
@@ -1049,6 +1044,15 @@ static int davinci_mcasp_calc_clk_div(struct davinci_mcasp *mcasp,
 	return error_ppm;
 }
 
+static uint mcasp_clocks_per_slot(struct davinci_mcasp *mcasp, uint width)
+{
+	if (mcasp->op_mode == DAVINCI_MCASP_DIT_MODE)
+		return 64;
+	if (mcasp->slot_width)
+		return mcasp->slot_width;
+	return width;
+}
+
 static int davinci_mcasp_hw_params(struct snd_pcm_substream *substream,
 					struct snd_pcm_hw_params *params,
 					struct snd_soc_dai *cpu_dai)
@@ -1068,12 +1072,9 @@ static int davinci_mcasp_hw_params(struct snd_pcm_substream *substream,
 	 * the machine driver, we need to calculate the ratio.
 	 */
 	if (mcasp->bclk_master && mcasp->bclk_div == 0 && mcasp->sysclk_freq) {
-		int slots = mcasp->tdm_slots;
-		int rate = params_rate(params);
-		int sbits = params_width(params);
-
-		if (mcasp->slot_width)
-			sbits = mcasp->slot_width;
+		uint slots = mcasp->tdm_slots;
+		uint rate = params_rate(params);
+		uint sbits = mcasp_clocks_per_slot(mcasp, params_width(params));
 
 		davinci_mcasp_calc_clk_div(mcasp, rate * sbits * slots, true);
 	}
@@ -1103,11 +1104,13 @@ static int davinci_mcasp_hw_params(struct snd_pcm_substream *substream,
 		word_length = 16;
 		break;
 
-	case SNDRV_PCM_FORMAT_U24_3LE:
-	case SNDRV_PCM_FORMAT_S24_3LE:
-		word_length = 24;
+	case SNDRV_PCM_FORMAT_U20_3LE:
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		word_length = 20;
 		break;
 
+	case SNDRV_PCM_FORMAT_U24_3LE:
+	case SNDRV_PCM_FORMAT_S24_3LE:
 	case SNDRV_PCM_FORMAT_U24_LE:
 	case SNDRV_PCM_FORMAT_S24_LE:
 		word_length = 24;
@@ -1169,14 +1172,11 @@ static int davinci_mcasp_hw_rule_rate(struct snd_pcm_hw_params *params,
 	struct davinci_mcasp_ruledata *rd = rule->private;
 	struct snd_interval *ri =
 		hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
-	int sbits = params_width(params);
-	int slots = rd->mcasp->tdm_slots;
+	uint sbits = mcasp_clocks_per_slot(rd->mcasp, params_width(params));
+	uint slots = rd->mcasp->tdm_slots;
 	struct snd_interval range;
 	int i;
 
-	if (rd->mcasp->slot_width)
-		sbits = rd->mcasp->slot_width;
-
 	snd_interval_any(&range);
 	range.empty = 1;
 
@@ -1223,8 +1223,7 @@ static int davinci_mcasp_hw_rule_format(struct snd_pcm_hw_params *params,
 			uint sbits = snd_pcm_format_width(i);
 			int ppm;
 
-			if (rd->mcasp->slot_width)
-				sbits = rd->mcasp->slot_width;
+			sbits = mcasp_clocks_per_slot(rd->mcasp, sbits);
 
 			ppm = davinci_mcasp_calc_clk_div(rd->mcasp,
 							 sbits * slots * rate,
@@ -1834,7 +1833,9 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
 
 	mcasp->op_mode = pdata->op_mode;
 	/* sanity check for tdm slots parameter */
-	if (mcasp->op_mode == DAVINCI_MCASP_IIS_MODE) {
+	if (mcasp->op_mode == DAVINCI_MCASP_DIT_MODE) {
+		mcasp->tdm_slots = 2;
+	} else {
 		if (pdata->tdm_slots < 2) {
 			dev_err(&pdev->dev, "invalid tdm slots: %d\n",
 				pdata->tdm_slots);
diff --git a/sound/soc/davinci/davinci-mcasp.h b/sound/soc/davinci/davinci-mcasp.h
index afddc80..ecc33cb 100644
--- a/sound/soc/davinci/davinci-mcasp.h
+++ b/sound/soc/davinci/davinci-mcasp.h
@@ -82,12 +82,12 @@
 /* Serializer n Control Register */
 #define DAVINCI_MCASP_XRSRCTL_BASE_REG	0x180
 #define DAVINCI_MCASP_XRSRCTL_REG(n)	(DAVINCI_MCASP_XRSRCTL_BASE_REG + \
-						(n << 2))
+						((n) << 2))
 
 /* Transmit Buffer for Serializer n */
-#define DAVINCI_MCASP_TXBUF_REG(n)	(0x200 + (n << 2))
+#define DAVINCI_MCASP_TXBUF_REG(n)	(0x200 + ((n) << 2))
 /* Receive Buffer for Serializer n */
-#define DAVINCI_MCASP_RXBUF_REG(n)	(0x280 + (n << 2))
+#define DAVINCI_MCASP_RXBUF_REG(n)	(0x280 + ((n) << 2))
 
 /* McASP FIFO Registers */
 #define DAVINCI_MCASP_V2_AFIFO_BASE	(0x1010)
@@ -109,7 +109,7 @@
 /*
  * DAVINCI_MCASP_PFUNC_REG - Pin Function / GPIO Enable Register Bits
  */
-#define AXR(n)		(1<<n)
+#define AXR(n)		(1<<(n))
 #define PFUNC_AMUTE	BIT(25)
 #define ACLKX		BIT(26)
 #define AHCLKX		BIT(27)
@@ -121,7 +121,7 @@
 /*
  * DAVINCI_MCASP_PDIR_REG - Pin Direction Register Bits
  */
-#define AXR(n)		(1<<n)
+#define AXR(n)		(1<<(n))
 #define PDIR_AMUTE	BIT(25)
 #define ACLKX		BIT(26)
 #define AHCLKX		BIT(27)
@@ -142,22 +142,22 @@
  */
 #define TXROT(val)	(val)
 #define TXSEL		BIT(3)
-#define TXSSZ(val)	(val<<4)
-#define TXPBIT(val)	(val<<8)
-#define TXPAD(val)	(val<<13)
+#define TXSSZ(val)	((val)<<4)
+#define TXPBIT(val)	((val)<<8)
+#define TXPAD(val)	((val)<<13)
 #define TXORD		BIT(15)
-#define FSXDLY(val)	(val<<16)
+#define FSXDLY(val)	((val)<<16)
 
 /*
  * DAVINCI_MCASP_RXFMT_REG - Receive Bitstream Format Register Bits
  */
 #define RXROT(val)	(val)
 #define RXSEL		BIT(3)
-#define RXSSZ(val)	(val<<4)
-#define RXPBIT(val)	(val<<8)
-#define RXPAD(val)	(val<<13)
+#define RXSSZ(val)	((val)<<4)
+#define RXPBIT(val)	((val)<<8)
+#define RXPAD(val)	((val)<<13)
 #define RXORD		BIT(15)
-#define FSRDLY(val)	(val<<16)
+#define FSRDLY(val)	((val)<<16)
 
 /*
  * DAVINCI_MCASP_TXFMCTL_REG -  Transmit Frame Control Register Bits
@@ -165,7 +165,7 @@
 #define FSXPOL		BIT(0)
 #define AFSXE		BIT(1)
 #define FSXDUR		BIT(4)
-#define FSXMOD(val)	(val<<7)
+#define FSXMOD(val)	((val)<<7)
 
 /*
  * DAVINCI_MCASP_RXFMCTL_REG - Receive Frame Control Register Bits
@@ -173,7 +173,7 @@
 #define FSRPOL		BIT(0)
 #define AFSRE		BIT(1)
 #define FSRDUR		BIT(4)
-#define FSRMOD(val)	(val<<7)
+#define FSRMOD(val)	((val)<<7)
 
 /*
  * DAVINCI_MCASP_ACLKXCTL_REG - Transmit Clock Control Register Bits
@@ -229,17 +229,17 @@
  */
 #define LBEN		BIT(0)
 #define LBORD		BIT(1)
-#define LBGENMODE(val)	(val<<2)
+#define LBGENMODE(val)	((val)<<2)
 
 /*
  * DAVINCI_MCASP_TXTDMSLOT_REG - Transmit TDM Slot Register configuration
  */
-#define TXTDMS(n)	(1<<n)
+#define TXTDMS(n)	(1<<(n))
 
 /*
  * DAVINCI_MCASP_RXTDMSLOT_REG - Receive TDM Slot Register configuration
  */
-#define RXTDMS(n)	(1<<n)
+#define RXTDMS(n)	(1<<(n))
 
 /*
  * DAVINCI_MCASP_GBLCTL_REG -  Global Control Register Bits