diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go
index 6ee7f3c1dd..3fe44202ac 100644
--- a/daemon/execdriver/lxc/driver.go
+++ b/daemon/execdriver/lxc/driver.go
@@ -5,6 +5,7 @@ import (
 	"github.com/dotcloud/docker/daemon/execdriver"
 	"github.com/dotcloud/docker/pkg/cgroups"
 	"github.com/dotcloud/docker/pkg/label"
+	"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
 	"github.com/dotcloud/docker/pkg/system"
 	"github.com/dotcloud/docker/utils"
 	"io/ioutil"
@@ -35,6 +36,10 @@ func init() {
 			return err
 		}
 
+		if err := restrict.Restrict("/", "/empty"); err != nil {
+			return err
+		}
+
 		if err := setupCapabilities(args); err != nil {
 			return err
 		}
diff --git a/daemon/execdriver/lxc/lxc_template.go b/daemon/execdriver/lxc/lxc_template.go
index bc94e7a19d..03d32e72b5 100644
--- a/daemon/execdriver/lxc/lxc_template.go
+++ b/daemon/execdriver/lxc/lxc_template.go
@@ -82,15 +82,12 @@ lxc.pivotdir = lxc_putold
 
 # NOTICE: These mounts must be applied within the namespace
 
-#  WARNING: procfs is a known attack vector and should probably be disabled
-#           if your userspace allows it. eg. see http://blog.zx2c4.com/749
+# WARNING: mounting procfs and/or sysfs read-write is a known attack vector.
+# See e.g. http://blog.zx2c4.com/749 and http://bit.ly/T9CkqJ
+# We mount them read-write here, but later, dockerinit will call the Restrict() function to remount them read-only.
+# We cannot mount them directly read-only, because that would prevent loading AppArmor profiles.
 lxc.mount.entry = proc {{escapeFstabSpaces $ROOTFS}}/proc proc nosuid,nodev,noexec 0 0
-
-# WARNING: sysfs is a known attack vector and should probably be disabled
-# if your userspace allows it. eg. see http://bit.ly/T9CkqJ
-{{if .Privileged}}
 lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noexec 0 0
-{{end}}
 
 {{if .Tty}}
 lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bind,rw 0 0
@@ -111,14 +108,14 @@ lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabS
 {{if .AppArmor}}
 lxc.aa_profile = unconfined
 {{else}}
-# not unconfined
+# Let AppArmor normal confinement take place (i.e., not unconfined)
 {{end}}
 {{else}}
-# restrict access to proc
-lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/sys none bind,ro 0 0
-lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/irq none bind,ro 0 0
-lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/acpi none bind,ro 0 0
-lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/sysrq-trigger none bind,ro 0 0
+# Restrict access to some stuff in /proc. Note that /proc is already mounted
+# read-only, so we don't need to bother about things that are just dangerous
+# to write to (like sysrq-trigger). Also, recent kernels won't let a container
+# peek into /proc/kcore, but let's cater for people who might run Docker on
+# older kernels. Just in case.
 lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/kcore none bind,ro 0 0
 {{end}}
 
diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go
index 00e6fc4b26..6f663f916e 100644
--- a/daemon/execdriver/native/create.go
+++ b/daemon/execdriver/native/create.go
@@ -84,8 +84,6 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error {
 	}
 	container.Cgroups.DeviceAccess = true
 
-	// add sysfs as a mount for privileged containers
-	container.Mounts = append(container.Mounts, libcontainer.Mount{Type: "sysfs"})
 	delete(container.Context, "restriction_path")
 
 	if apparmor.IsEnabled() {
diff --git a/integration-cli/docker_cli_run_test.go b/integration-cli/docker_cli_run_test.go
index 83867267ae..b9737feeea 100644
--- a/integration-cli/docker_cli_run_test.go
+++ b/integration-cli/docker_cli_run_test.go
@@ -725,24 +725,46 @@ func TestUnPrivilegedCannotMount(t *testing.T) {
 	logDone("run - test un-privileged cannot mount")
 }
 
-func TestSysNotAvaliableInNonPrivilegedContainers(t *testing.T) {
-	cmd := exec.Command(dockerBinary, "run", "busybox", "ls", "/sys/kernel")
+func TestSysNotWritableInNonPrivilegedContainers(t *testing.T) {
+	cmd := exec.Command(dockerBinary, "run", "busybox", "touch", "/sys/kernel/profiling")
 	if code, err := runCommand(cmd); err == nil || code == 0 {
-		t.Fatal("sys should not be available in a non privileged container")
+		t.Fatal("sys should not be writable in a non privileged container")
 	}
 
 	deleteAllContainers()
 
-	logDone("run - sys not avaliable in non privileged container")
+	logDone("run - sys not writable in non privileged container")
 }
 
-func TestSysAvaliableInPrivilegedContainers(t *testing.T) {
-	cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "ls", "/sys/kernel")
+func TestSysWritableInPrivilegedContainers(t *testing.T) {
+	cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "touch", "/sys/kernel/profiling")
 	if code, err := runCommand(cmd); err != nil || code != 0 {
-		t.Fatalf("sys should be available in privileged container")
+		t.Fatalf("sys should be writable in privileged container")
 	}
 
 	deleteAllContainers()
 
-	logDone("run - sys avaliable in privileged container")
+	logDone("run - sys writable in privileged container")
+}
+
+func TestProcNotWritableInNonPrivilegedContainers(t *testing.T) {
+	cmd := exec.Command(dockerBinary, "run", "busybox", "touch", "/proc/sysrq-trigger")
+	if code, err := runCommand(cmd); err == nil || code == 0 {
+		t.Fatal("proc should not be writable in a non privileged container")
+	}
+
+	deleteAllContainers()
+
+	logDone("run - proc not writable in non privileged container")
+}
+
+func TestProcWritableInPrivilegedContainers(t *testing.T) {
+	cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "touch", "/proc/sysrq-trigger")
+	if code, err := runCommand(cmd); err != nil || code != 0 {
+		t.Fatalf("proc should be writable in privileged container")
+	}
+
+	deleteAllContainers()
+
+	logDone("run - proc writable in privileged container")
 }
diff --git a/pkg/libcontainer/mount/init.go b/pkg/libcontainer/mount/init.go
index 735970cded..cc3ce2158e 100644
--- a/pkg/libcontainer/mount/init.go
+++ b/pkg/libcontainer/mount/init.go
@@ -11,7 +11,6 @@ import (
 	"github.com/dotcloud/docker/pkg/label"
 	"github.com/dotcloud/docker/pkg/libcontainer"
 	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
-	"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
 	"github.com/dotcloud/docker/pkg/system"
 )
 
@@ -51,11 +50,6 @@ func InitializeMountNamespace(rootfs, console string, container *libcontainer.Co
 	if err := nodes.CopyN(rootfs, nodes.DefaultNodes); err != nil {
 		return fmt.Errorf("copy dev nodes %s", err)
 	}
-	if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
-		if err := restrict.Restrict(rootfs, restrictionPath); err != nil {
-			return fmt.Errorf("restrict %s", err)
-		}
-	}
 	if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
 		return err
 	}
@@ -124,10 +118,11 @@ func setupBindmounts(rootfs string, bindMounts libcontainer.Mounts) error {
 }
 
 // TODO: this is crappy right now and should be cleaned up with a better way of handling system and
-// standard bind mounts allowing them to be more dymanic
+// standard bind mounts allowing them to be more dynamic
 func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount {
 	systemMounts := []mount{
 		{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
+		{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
 	}
 
 	if len(mounts.OfType("devtmpfs")) == 1 {
@@ -138,8 +133,5 @@ func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mo
 		mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
 	)
 
-	if len(mounts.OfType("sysfs")) == 1 {
-		systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags})
-	}
 	return systemMounts
 }
diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go
index faec12af32..bafb877cd9 100644
--- a/pkg/libcontainer/nsinit/init.go
+++ b/pkg/libcontainer/nsinit/init.go
@@ -16,6 +16,7 @@ import (
 	"github.com/dotcloud/docker/pkg/libcontainer/mount"
 	"github.com/dotcloud/docker/pkg/libcontainer/network"
 	"github.com/dotcloud/docker/pkg/libcontainer/security/capabilities"
+	"github.com/dotcloud/docker/pkg/libcontainer/security/restrict"
 	"github.com/dotcloud/docker/pkg/libcontainer/utils"
 	"github.com/dotcloud/docker/pkg/system"
 	"github.com/dotcloud/docker/pkg/user"
@@ -68,18 +69,25 @@ func Init(container *libcontainer.Container, uncleanRootfs, consolePath string,
 	if err := system.Sethostname(container.Hostname); err != nil {
 		return fmt.Errorf("sethostname %s", err)
 	}
-	if err := FinalizeNamespace(container); err != nil {
-		return fmt.Errorf("finalize namespace %s", err)
-	}
 
 	runtime.LockOSThread()
 
+	if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
+		if err := restrict.Restrict("/", restrictionPath); err != nil {
+			return err
+		}
+	}
+
 	if err := apparmor.ApplyProfile(os.Getpid(), container.Context["apparmor_profile"]); err != nil {
 		return err
 	}
 	if err := label.SetProcessLabel(container.Context["process_label"]); err != nil {
 		return fmt.Errorf("set process label %s", err)
 	}
+
+	if err := FinalizeNamespace(container); err != nil {
+		return fmt.Errorf("finalize namespace %s", err)
+	}
 	return system.Execv(args[0], args[0:], container.Env)
 }
 
diff --git a/pkg/libcontainer/security/restrict/restrict.go b/pkg/libcontainer/security/restrict/restrict.go
index 291d6ca5dc..8c08ea1806 100644
--- a/pkg/libcontainer/security/restrict/restrict.go
+++ b/pkg/libcontainer/security/restrict/restrict.go
@@ -9,43 +9,67 @@ import (
 	"github.com/dotcloud/docker/pkg/system"
 )
 
-const flags = syscall.MS_BIND | syscall.MS_REC | syscall.MS_RDONLY
-
-var restrictions = map[string]string{
-	// dirs
-	"/proc/sys":  "",
-	"/proc/irq":  "",
-	"/proc/acpi": "",
-
-	// files
-	"/proc/sysrq-trigger": "/dev/null",
-	"/proc/kcore":         "/dev/null",
+// "restrictions" are container paths (files, directories, whatever) that have to be masked.
+// maskPath is a "safe" path to be mounted over maskedPath. It can take two special values:
+// - if it is "", then nothing is mounted;
+// - if it is "EMPTY", then an empty directory is mounted instead.
+// If remountRO is true then the maskedPath is remounted read-only (regardless of whether a maskPath was used).
+type restriction struct {
+	maskedPath string
+	maskPath   string
+	remountRO  bool
 }
 
-// Restrict locks down access to many areas of proc
-// by using the asumption that the user does not have mount caps to
-// revert the changes made here
-func Restrict(rootfs, empty string) error {
-	for dest, source := range restrictions {
-		dest = filepath.Join(rootfs, dest)
+var restrictions = []restriction{
+	{"/proc", "", true},
+	{"/sys", "", true},
+	{"/proc/kcore", "/dev/null", false},
+}
 
-		// we don't have a "/dev/null" for dirs so have the requester pass a dir
-		// for us to bind mount
-		switch source {
-		case "":
-			source = empty
-		default:
-			source = filepath.Join(rootfs, source)
-		}
-		if err := system.Mount(source, dest, "bind", flags, ""); err != nil {
-			if os.IsNotExist(err) {
-				continue
+// This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts).
+// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes).
+// "empty" should be the path to an empty directory.
+func Restrict(rootfs, empty string) error {
+	for _, restriction := range restrictions {
+		dest := filepath.Join(rootfs, restriction.maskedPath)
+		if restriction.maskPath != "" {
+			var source string
+			if restriction.maskPath == "EMPTY" {
+				source = empty
+			} else {
+				source = filepath.Join(rootfs, restriction.maskPath)
+			}
+			if err := system.Mount(source, dest, "", syscall.MS_BIND, ""); err != nil {
+				return fmt.Errorf("unable to bind-mount %s over %s: %s", source, dest, err)
 			}
-			return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
 		}
-		if err := system.Mount("", dest, "bind", flags|syscall.MS_REMOUNT, ""); err != nil {
-			return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
+		if restriction.remountRO {
+			if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
+				return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
+			}
 		}
 	}
+
+	// This weird trick will allow us to mount /proc read-only, while being able to use AppArmor.
+	// This is because apparently, loading an AppArmor profile requires write access to /proc/1/attr.
+	// So we do another mount of procfs, ensure it's write-able, and bind-mount a subset of it.
+	tmpProcPath := filepath.Join(rootfs, ".proc")
+	if err := os.Mkdir(tmpProcPath, 0700); err != nil {
+		return fmt.Errorf("unable to create temporary proc mountpoint %s: %s", tmpProcPath, err)
+	}
+	if err := system.Mount("proc", tmpProcPath, "proc", 0, ""); err != nil {
+		return fmt.Errorf("unable to mount proc on temporary proc mountpoint: %s", err)
+	}
+	if err := system.Mount("proc", tmpProcPath, "", syscall.MS_REMOUNT, ""); err != nil {
+		return fmt.Errorf("unable to remount proc read-write: %s", err)
+	}
+	rwAttrPath := filepath.Join(rootfs, ".proc", "1", "attr")
+	roAttrPath := filepath.Join(rootfs, "proc", "1", "attr")
+	if err := system.Mount(rwAttrPath, roAttrPath, "", syscall.MS_BIND, ""); err != nil {
+		return fmt.Errorf("unable to bind-mount %s on %s: %s", rwAttrPath, roAttrPath, err)
+	}
+	if err := system.Unmount(tmpProcPath, 0); err != nil {
+		return fmt.Errorf("unable to unmount temporary proc filesystem: %s", err)
+	}
 	return nil
 }