From bf3a9a5039650352efdb00d18fbfb1481d9e8d9d Mon Sep 17 00:00:00 2001 From: Fabio Rehm Date: Thu, 8 Jan 2015 03:31:01 -0200 Subject: [PATCH] action: Some more tweaks around private networking + new pipework code [GH-298] --- lib/vagrant-lxc/action.rb | 2 + .../action/gc_private_network_bridges.rb | 21 ++ lib/vagrant-lxc/action/private_networks.rb | 29 +- scripts/private-network | 343 ++++++++++++------ 4 files changed, 270 insertions(+), 125 deletions(-) create mode 100644 lib/vagrant-lxc/action/gc_private_network_bridges.rb diff --git a/lib/vagrant-lxc/action.rb b/lib/vagrant-lxc/action.rb index 7ffd4ed..883654c 100644 --- a/lib/vagrant-lxc/action.rb +++ b/lib/vagrant-lxc/action.rb @@ -8,6 +8,7 @@ require 'vagrant-lxc/action/fetch_ip_with_lxc_attach' require 'vagrant-lxc/action/fetch_ip_from_dnsmasq_leases' require 'vagrant-lxc/action/forced_halt' require 'vagrant-lxc/action/forward_ports' +require 'vagrant-lxc/action/gc_private_network_bridges' require 'vagrant-lxc/action/handle_box_metadata' require 'vagrant-lxc/action/prepare_nfs_settings' require 'vagrant-lxc/action/prepare_nfs_valid_ids' @@ -131,6 +132,7 @@ module Vagrant b3.use ForcedHalt end end + b2.use GcPrivateNetworkBridges end end end diff --git a/lib/vagrant-lxc/action/gc_private_network_bridges.rb b/lib/vagrant-lxc/action/gc_private_network_bridges.rb new file mode 100644 index 0000000..d7bb83f --- /dev/null +++ b/lib/vagrant-lxc/action/gc_private_network_bridges.rb @@ -0,0 +1,21 @@ +# sudo ifconfig br1 down && sudo brctl delbr br1 + +module Vagrant + module LXC + module Action + class GcPrivateNetworkBridges + def initialize(app, env) + @app = app + end + + def call(env) + if env[:machine].provider.state.id != :running + puts 'Cleanup bridges!' + end + + @app.call(env) + end + end + end + end +end diff --git a/lib/vagrant-lxc/action/private_networks.rb b/lib/vagrant-lxc/action/private_networks.rb index cbbe576..c0794e7 100644 --- a/lib/vagrant-lxc/action/private_networks.rb +++ b/lib/vagrant-lxc/action/private_networks.rb @@ -26,11 +26,15 @@ module Vagrant container_name = env[:machine].provider.driver.container_name ip = config[:ip] - configure_single_network('br1', container_name, ip) + bridge_ip = config.fetch(:lxc__bridge_ip) { build_bridge_ip(ip) } + bridge = config.fetch(:lxc__bridge_name) # { build_bridge_name(config.fetch(:lxc__bridge_prefix, 'br'), bridge_ip) } + + # TODO: ensure_ip_is_not_in_use! + configure_single_network(bridge, bridge_ip, container_name, ip) end end - def configure_single_network(bridge, container_name, ip) + def configure_single_network(bridge, bridge_ip, container_name, ip) cmd = [ 'sudo', Vagrant::LXC.source_root.join('scripts/private-network').to_s, @@ -38,22 +42,35 @@ module Vagrant container_name, "#{ip}/24" ] - puts cmd.join(' ') - system cmd.join(' ') + execute(cmd) + # TODO: Run only if bridge is not up and move it to the private network script cmd = [ 'sudo', 'ip', 'addr', 'add', - # TODO: This should not be hard coded and has to run once per bridge - "192.168.1.254/24", + "#{bridge_ip}/24", 'dev', bridge ] + execute(cmd) + end + + def execute(cmd) puts cmd.join(' ') system cmd.join(' ') end + + def build_bridge_ip(ip) + ip.sub(/^(\d+\.\d+\.\d+)\.\d+/, '\1.254') + end + + def bridge_name(prefix, bridge_ip) + # if a bridge with the provided ip and prefix exist, get its name and return it + # if no bridges can be found, grab the max bridge number, increment it and return the new name + 'br3' + end end end end diff --git a/scripts/private-network b/scripts/private-network index 5b7c8be..ad94bdc 100755 --- a/scripts/private-network +++ b/scripts/private-network @@ -1,193 +1,298 @@ #!/bin/bash -# This is a snapshot of https://github.com/jpetazzo/pipework/blob/edbd33ab49ab0dff0bee46b019055360f325a6e5/pipework -# with docker specifics trimmed out +# Borrowed from https://github.com/jpetazzo/pipework set -e case "$1" in - --wait) - WAIT=1 - ;; + --wait) + WAIT=1 + ;; esac IFNAME=$1 -if [ "$2" == "-i" ]; then + +# default value set further down if not set here +CONTAINER_IFNAME= +if [ "$2" = "-i" ]; then CONTAINER_IFNAME=$3 shift 2 -else - CONTAINER_IFNAME=eth1 fi + GUESTNAME=$2 IPADDR=$3 MACADDR=$4 +if echo $MACADDR | grep -q @ +then + VLAN=$(echo $MACADDR | cut -d@ -f2) + MACADDR=$(echo $MACADDR | cut -d@ -f1) +else + VLAN= +fi + +[ "$IPADDR" ] || [ "$WAIT" ] || { + echo "Syntax:" + echo "pipework [-i containerinterface] /[@default_gateway] [macaddr][@vlan]" + echo "pipework [-i containerinterface] dhcp [macaddr][@vlan]" + echo "pipework --wait [-i containerinterface]" + exit 1 +} + +# First step: determine type of first argument (bridge, physical interface...), skip if --wait set +if [ -z "$WAIT" ]; then + if [ -d /sys/class/net/$IFNAME ] + then + if [ -d /sys/class/net/$IFNAME/bridge ] + then + IFTYPE=bridge + BRTYPE=linux + elif $(which ovs-vsctl >/dev/null 2>&1) && $(ovs-vsctl list-br|grep -q ^$IFNAME$) + then + IFTYPE=bridge + BRTYPE=openvswitch + elif [ $(cat /sys/class/net/$IFNAME/type) -eq 32 ]; # Infiniband IPoIB interface type 32 + then + IFTYPE=ipoib + # The IPoIB kernel module is fussy, set device name to ib0 if not overridden + CONTAINER_IFNAME=${CONTAINER_IFNAME:-ib0} + else IFTYPE=phys + fi + else + # case "$IFNAME" in + # br*) + IFTYPE=bridge + BRTYPE=linux + # ;; + # ovs*) + # if ! $(which ovs-vsctl >/dev/null) + # then + # echo "Need OVS installed on the system to create an ovs bridge" + # exit 1 + # fi + # IFTYPE=bridge + # BRTYPE=openvswitch + # ;; + # *) + # echo "I do not know how to setup interface $IFNAME." + # exit 1 + # ;; + # esac + fi +fi + +# Set the default container interface name to eth1 if not already set +CONTAINER_IFNAME=${CONTAINER_IFNAME:-eth1} + [ "$WAIT" ] && { while ! grep -q ^1$ /sys/class/net/$CONTAINER_IFNAME/carrier 2>/dev/null - do sleep 1 + do sleep 1 done exit 0 } -[ "$IPADDR" ] || { - echo "Syntax:" - echo "pipework [-i containerinterface] /[@default_gateway] [macaddr]" - echo "pipework [-i containerinterface] dhcp [macaddr]" - echo "pipework --wait" - exit 1 +[ $IFTYPE = bridge ] && [ $BRTYPE = linux ] && [ "$VLAN" ] && { + echo "VLAN configuration currently unsupported for Linux bridge." + exit 1 } -# First step: determine type of first argument (bridge, physical interface...) -if [ -d /sys/class/net/$IFNAME ] -then - if [ -d /sys/class/net/$IFNAME/bridge ] - then - IFTYPE=bridge - BRTYPE=linux - elif $(which ovs-vsctl >/dev/null) && $(ovs-vsctl list-br|grep -q ^$IFNAME$) - then - IFTYPE=bridge - BRTYPE=openvswitch - else IFTYPE=phys - fi -else - case "$IFNAME" in - br*) - IFTYPE=bridge - BRTYPE=linux - ;; - *) - echo "I do not know how to setup interface $IFNAME." - exit 1 - ;; - esac -fi +[ $IFTYPE = ipoib ] && [ $MACADDR ] && { + echo "MACADDR configuration unsupported for IPoIB interfaces." + exit 1 +} -# Second step: find the guest +# Second step: find the guest (for now, we only support LXC containers) while read dev mnt fstype options dump fsck do - [ "$fstype" != "cgroup" ] && continue - echo $options | grep -qw devices || continue - CGROUPMNT=$mnt + [ "$fstype" != "cgroup" ] && continue + echo $options | grep -qw devices || continue + CGROUPMNT=$mnt done < /proc/mounts [ "$CGROUPMNT" ] || { - echo "Could not locate cgroup mount point." - exit 1 + echo "Could not locate cgroup mount point." + exit 1 } # Try to find a cgroup matching exactly the provided name. N=$(find "$CGROUPMNT" -name "$GUESTNAME" | wc -l) case "$N" in - 0) - echo "Container $GUESTNAME not found." - exit 1 - ;; - 1) - true - ;; - *) - echo "Found more than one container matching $GUESTNAME." - exit 1 - ;; + 0) + # If we didn't find anything, try to lookup the container with Docker. + if which docker >/dev/null + then + RETRIES=3 + while [ $RETRIES -gt 0 ]; do + DOCKERPID=$(docker inspect --format='{{ .State.Pid }}' $GUESTNAME) + [ $DOCKERPID != 0 ] && break + sleep 1 + RETRIES=$((RETRIES - 1)) + done + + [ "$DOCKERPID" = 0 ] && { + echo "Docker inspect returned invalid PID 0" + exit 1 + } + + [ "$DOCKERPID" = "" ] && { + echo "Container $GUESTNAME not found, and unknown to Docker." + exit 1 + } + else + echo "Container $GUESTNAME not found, and Docker not installed." + exit 1 + fi + ;; + 1) + true + ;; + *) + echo "Found more than one container matching $GUESTNAME." + exit 1 + ;; esac if [ "$IPADDR" = "dhcp" ] then - # Check for first available dhcp client - DHCP_CLIENT_LIST="udhcpc dhcpcd dhclient" - for CLIENT in $DHCP_CLIENT_LIST; do - which $CLIENT >/dev/null && { - DHCP_CLIENT=$CLIENT - break + # Check for first available dhcp client + DHCP_CLIENT_LIST="udhcpc dhcpcd dhclient" + for CLIENT in $DHCP_CLIENT_LIST; do + which $CLIENT >/dev/null && { + DHCP_CLIENT=$CLIENT + break + } + done + [ -z $DHCP_CLIENT ] && { + echo "You asked for DHCP; but no DHCP client could be found." + exit 1 } - done - [ -z $DHCP_CLIENT ] && { - echo "You asked for DHCP; but no DHCP client could be found." - exit 1 - } else - # Check if a subnet mask was provided. - echo $IPADDR | grep -q / || { - echo "The IP address should include a netmask." - echo "Maybe you meant $IPADDR/24 ?" - exit 1 - } - # Check if a gateway address was provided. - if echo $IPADDR | grep -q @ - then - GATEWAY=$(echo $IPADDR | cut -d@ -f2) - IPADDR=$(echo $IPADDR | cut -d@ -f1) - else - GATEWAY= - fi + # Check if a subnet mask was provided. + echo $IPADDR | grep -q / || { + echo "The IP address should include a netmask." + echo "Maybe you meant $IPADDR/24 ?" + exit 1 + } + # Check if a gateway address was provided. + if echo $IPADDR | grep -q @ + then + GATEWAY=$(echo $IPADDR | cut -d@ -f2) + IPADDR=$(echo $IPADDR | cut -d@ -f1) + else + GATEWAY= + fi fi -NSPID=$(head -n 1 $(find "$CGROUPMNT" -name "$GUESTNAME" | head -n 1)/tasks) -[ "$NSPID" ] || { - echo "Could not find a process inside container $GUESTNAME." - exit 1 +if [ $DOCKERPID ]; then + NSPID=$DOCKERPID +else + NSPID=$(head -n 1 $(find "$CGROUPMNT" -name "$GUESTNAME" | head -n 1)/tasks) + [ "$NSPID" ] || { + echo "Could not find a process inside container $GUESTNAME." + exit 1 + } +fi + +# Check if an incompatible VLAN device already exists +[ $IFTYPE = phys ] && [ "$VLAN" ] && [ -d /sys/class/net/$IFNAME.VLAN ] && { + [ -z "$(ip -d link show $IFNAME.$VLAN | grep "vlan.*id $VLAN")" ] && { + echo "$IFNAME.VLAN already exists but is not a VLAN device for tag $VLAN" + exit 1 + } } [ ! -d /var/run/netns ] && mkdir -p /var/run/netns [ -f /var/run/netns/$NSPID ] && rm -f /var/run/netns/$NSPID ln -s /proc/$NSPID/ns/net /var/run/netns/$NSPID - # Check if we need to create a bridge. [ $IFTYPE = bridge ] && [ ! -d /sys/class/net/$IFNAME ] && { - (ip link set $IFNAME type bridge > /dev/null 2>&1) || (brctl addbr $IFNAME) - ip link set $IFNAME up + [ $BRTYPE = linux ] && { + (ip link add dev $IFNAME type bridge > /dev/null 2>&1) || (brctl addbr $IFNAME) + ip link set $IFNAME up + } + [ $BRTYPE = openvswitch ] && { + ovs-vsctl add-br $IFNAME + } } +MTU=$(ip link show $IFNAME | awk '{print $5}') # If it's a bridge, we need to create a veth pair [ $IFTYPE = bridge ] && { - LOCAL_IFNAME=pl$NSPID$CONTAINER_IFNAME - GUEST_IFNAME=pg$NSPID$CONTAINER_IFNAME - ip link add name $LOCAL_IFNAME type veth peer name $GUEST_IFNAME - case "$BRTYPE" in - linux) - (ip link set $LOCAL_IFNAME master $IFNAME > /dev/null 2>&1) || (brctl addif $IFNAME $LOCAL_IFNAME) - ;; - openvswitch) - ovs-vsctl add-port $IFNAME $LOCAL_IFNAME - ;; - esac - ip link set $LOCAL_IFNAME up + LOCAL_IFNAME="v${CONTAINER_IFNAME}pl${NSPID}" + GUEST_IFNAME="v${CONTAINER_IFNAME}pg${NSPID}" + ip link add name $LOCAL_IFNAME mtu $MTU type veth peer name $GUEST_IFNAME mtu $MTU + case "$BRTYPE" in + linux) + (ip link set $LOCAL_IFNAME master $IFNAME > /dev/null 2>&1) || (brctl addif $IFNAME $LOCAL_IFNAME) + ;; + openvswitch) + ovs-vsctl add-port $IFNAME $LOCAL_IFNAME ${VLAN:+"tag=$VLAN"} + ;; + esac + ip link set $LOCAL_IFNAME up +} + +# Note: if no container interface name was specified, pipework will default to ib0 +# Note: no macvlan subinterface or ethernet bridge can be created against an +# ipoib interface. Infiniband is not ethernet. ipoib is an IP layer for it. +# To provide additional ipoib interfaces to containers use SR-IOV and pipework +# to assign them. +[ $IFTYPE = ipoib ] && { + GUEST_IFNAME=$CONTAINER_IFNAME } # If it's a physical interface, create a macvlan subinterface [ $IFTYPE = phys ] && { - GUEST_IFNAME=ph$NSPID$CONTAINER_IFNAME - ip link add link $IFNAME dev $GUEST_IFNAME type macvlan mode bridge - ip link set $IFNAME up + [ "$VLAN" ] && { + [ ! -d /sys/class/net/$IFNAME.$VLAN ] && { + ip link add link $IFNAME name $IFNAME.$VLAN mtu $MTU type vlan id $VLAN + } + + ip link set $IFNAME up + IFNAME=$IFNAME.$VLAN + } + GUEST_IFNAME=ph$NSPID$CONTAINER_IFNAME + ip link add link $IFNAME dev $GUEST_IFNAME mtu $MTU type macvlan mode bridge + ip link set $IFNAME up } ip link set $GUEST_IFNAME netns $NSPID ip netns exec $NSPID ip link set $GUEST_IFNAME name $CONTAINER_IFNAME -[ "$MACADDR" ] && ip netns exec $NSPID ip link set $CONTAINER_IFNAME address $MACADDR +[ "$MACADDR" ] && ip netns exec $NSPID ip link set dev $CONTAINER_IFNAME address $MACADDR if [ "$IPADDR" = "dhcp" ] then - [ $DHCP_CLIENT = "udhcpc" ] && ip netns exec $NSPID $DHCP_CLIENT -qi $CONTAINER_IFNAME - [ $DHCP_CLIENT = "dhclient" ] && ip netns exec $NSPID $DHCP_CLIENT $CONTAINER_IFNAME - [ $DHCP_CLIENT = "dhcpcd" ] && ip netns exec $NSPID $DHCP_CLIENT -q $CONTAINER_IFNAME + [ $DHCP_CLIENT = "udhcpc" ] && ip netns exec $NSPID $DHCP_CLIENT -qi $CONTAINER_IFNAME -x hostname:$GUESTNAME + if [ $DHCP_CLIENT = "dhclient" ] + then + # kill dhclient after get ip address to prevent device be used after container close + ip netns exec $NSPID $DHCP_CLIENT -pf "/var/run/dhclient.$NSPID.pid" $CONTAINER_IFNAME + kill "$(cat "/var/run/dhclient.$NSPID.pid")" + rm "/var/run/dhclient.$NSPID.pid" + fi + [ $DHCP_CLIENT = "dhcpcd" ] && ip netns exec $NSPID $DHCP_CLIENT -q $CONTAINER_IFNAME -h $GUESTNAME else - ip netns exec $NSPID ip addr add $IPADDR dev $CONTAINER_IFNAME - [ "$GATEWAY" ] && { - ip netns exec $NSPID ip route delete default >/dev/null 2>&1 && true - } - ip netns exec $NSPID ip link set $CONTAINER_IFNAME up - [ "$GATEWAY" ] && { - ip netns exec $NSPID ip route replace default via $GATEWAY - } + ip netns exec $NSPID ip addr add $IPADDR dev $CONTAINER_IFNAME + [ "$GATEWAY" ] && { + ip netns exec $NSPID ip route delete default >/dev/null 2>&1 && true + } + ip netns exec $NSPID ip link set $CONTAINER_IFNAME up + [ "$GATEWAY" ] && { + ip netns exec $NSPID ip route get $GATEWAY >/dev/null 2>&1 || \ + ip netns exec $NSPID ip route add $GATEWAY/32 dev $CONTAINER_IFNAME + ip netns exec $NSPID ip route replace default via $GATEWAY + } fi # Give our ARP neighbors a nudge about the new interface if which arping > /dev/null 2>&1 then - IPADDR=$(echo $IPADDR | cut -d/ -f1) - ip netns exec $NSPID arping -c 1 -A -I $CONTAINER_IFNAME $IPADDR > /dev/null 2>&1 + IPADDR=$(echo $IPADDR | cut -d/ -f1) + ip netns exec $NSPID arping -c 1 -A -I $CONTAINER_IFNAME $IPADDR > /dev/null 2>&1 || true else - echo "Warning: arping not found; interface may not be immediately reachable" + echo "Warning: arping not found; interface may not be immediately reachable" fi + +# Remove NSPID to avoid `ip netns` catch it. +[ -f /var/run/netns/$NSPID ] && rm -f /var/run/netns/$NSPID exit 0