mirror of
https://github.com/containers/podman.git
synced 2026-05-19 14:09:01 -04:00
libpod: replace listen with socket+bind for dual-stack port reservation
This commit addresses two concerns. Bind dual stack when hostIP unless it is explicitly specified. Since we use listen(), this change resulted in blocked connections on stacks without matching DNAT rules (e.g. connecting to [::1] on an IPv4-only network) because the TCP handshake on the reservation socket would complete instead of returning ECONNREFUSED allowing the client to fallback to IPv4. Replacing listen() with raw socket() and bind() syscalls fixes this by allowing us to reserve this port without accepting connections; clients get ECONNREFUSED and fall back to IPv4 automatically, as is desired. Fixes: https://github.com/containers/netavark/issues/1338 Co-authored-by: Paul Holzinger <pholzing@redhat.com> Signed-off-by: Danish Prakash <contact@danishpraka.sh>
This commit is contained in:
@@ -3,16 +3,19 @@
|
||||
package libpod
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"go.podman.io/common/libnetwork/types"
|
||||
"go.podman.io/podman/v6/libpod/define"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// Timeout before declaring that runtime has failed to kill a given
|
||||
@@ -32,9 +35,9 @@ func bindPorts(ports []types.PortMapping) ([]*os.File, error) {
|
||||
var files []*os.File
|
||||
sctpWarning := true
|
||||
for _, port := range ports {
|
||||
isV6 := net.ParseIP(port.HostIP).To4() == nil
|
||||
if port.HostIP == "" {
|
||||
isV6 = false
|
||||
isV6 := false
|
||||
if port.HostIP != "" {
|
||||
isV6 = net.ParseIP(port.HostIP).To4() == nil
|
||||
}
|
||||
protocols := strings.SplitSeq(port.Protocol, ",")
|
||||
for protocol := range protocols {
|
||||
@@ -42,7 +45,7 @@ func bindPorts(ports []types.PortMapping) ([]*os.File, error) {
|
||||
f, err := bindPort(protocol, port.HostIP, port.HostPort+i, isV6, &sctpWarning)
|
||||
if err != nil {
|
||||
// close all open ports in case of early error so we do not
|
||||
// rely garbage collector to close them
|
||||
// rely on the garbage collector to close them
|
||||
for _, f := range files {
|
||||
f.Close()
|
||||
}
|
||||
@@ -57,84 +60,96 @@ func bindPorts(ports []types.PortMapping) ([]*os.File, error) {
|
||||
return files, nil
|
||||
}
|
||||
|
||||
// bindPort reserves a port on the host using socket+bind without listen.
|
||||
// Dual-stack bind by default unless hostIP is specified.
|
||||
func bindPort(protocol, hostIP string, port uint16, isV6 bool, sctpWarning *bool) (*os.File, error) {
|
||||
var file *os.File
|
||||
switch protocol {
|
||||
case "udp":
|
||||
var (
|
||||
addr *net.UDPAddr
|
||||
err error
|
||||
)
|
||||
if isV6 {
|
||||
addr, err = net.ResolveUDPAddr("udp6", fmt.Sprintf("[%s]:%d", hostIP, port))
|
||||
} else {
|
||||
addr, err = net.ResolveUDPAddr("udp4", fmt.Sprintf("%s:%d", hostIP, port))
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot resolve the UDP address: %w", err)
|
||||
case "tcp", "udp":
|
||||
sockType := unix.SOCK_STREAM
|
||||
if protocol == "udp" {
|
||||
sockType = unix.SOCK_DGRAM
|
||||
}
|
||||
|
||||
proto := "udp4"
|
||||
if isV6 {
|
||||
proto = "udp6"
|
||||
}
|
||||
server, err := net.ListenUDP(proto, addr)
|
||||
domain, sa, err := buildSockAddr(hostIP, port, isV6)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot listen on the UDP port: %w", err)
|
||||
}
|
||||
file, err = server.File()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot get file for UDP socket: %w", err)
|
||||
}
|
||||
// close the listener
|
||||
// note that this does not affect the fd, see the godoc for server.File()
|
||||
err = server.Close()
|
||||
if err != nil {
|
||||
logrus.Warnf("Failed to close connection: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
case "tcp":
|
||||
var (
|
||||
addr *net.TCPAddr
|
||||
err error
|
||||
)
|
||||
if isV6 {
|
||||
addr, err = net.ResolveTCPAddr("tcp6", fmt.Sprintf("[%s]:%d", hostIP, port))
|
||||
} else {
|
||||
addr, err = net.ResolveTCPAddr("tcp4", fmt.Sprintf("%s:%d", hostIP, port))
|
||||
}
|
||||
fd, err := unix.Socket(domain, sockType|unix.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot resolve the TCP address: %w", err)
|
||||
// If hostIP == "" and IPv6 is not supported, fall back to IPv4
|
||||
if hostIP == "" && errors.Is(err, unix.EAFNOSUPPORT) {
|
||||
return bindPortV4Fallback(protocol, sockType, port)
|
||||
}
|
||||
return nil, fmt.Errorf("cannot create socket for %s port %d: %w", protocol, port, err)
|
||||
}
|
||||
|
||||
proto := "tcp4"
|
||||
if isV6 {
|
||||
proto = "tcp6"
|
||||
if err := setupSocketOpts(fd, domain, hostIP); err != nil {
|
||||
unix.Close(fd)
|
||||
return nil, err
|
||||
}
|
||||
server, err := net.ListenTCP(proto, addr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot listen on the TCP port: %w", err)
|
||||
}
|
||||
file, err = server.File()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot get file for TCP socket: %w", err)
|
||||
}
|
||||
// close the listener
|
||||
// note that this does not affect the fd, see the godoc for server.File()
|
||||
err = server.Close()
|
||||
if err != nil {
|
||||
logrus.Warnf("Failed to close connection: %v", err)
|
||||
|
||||
if err := unix.Bind(fd, sa); err != nil {
|
||||
unix.Close(fd)
|
||||
return nil, fmt.Errorf("cannot bind %s port %s: %w", protocol, net.JoinHostPort(hostIP, strconv.FormatUint(uint64(port), 10)), err)
|
||||
}
|
||||
|
||||
return os.NewFile(uintptr(fd), fmt.Sprintf("reservation-%s-%d", protocol, port)), nil
|
||||
|
||||
case "sctp":
|
||||
if *sctpWarning {
|
||||
logrus.Info("Port reservation for SCTP is not supported")
|
||||
*sctpWarning = false
|
||||
}
|
||||
return nil, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown protocol %s", protocol)
|
||||
}
|
||||
return file, nil
|
||||
}
|
||||
|
||||
func buildSockAddr(hostIP string, port uint16, isV6 bool) (int, unix.Sockaddr, error) {
|
||||
// default behaviour when hostIP == "" is to bind dual-stack
|
||||
// if hostIP != ""; determine the stack using the address specified
|
||||
if hostIP == "" {
|
||||
return unix.AF_INET6, &unix.SockaddrInet6{Port: int(port)}, nil
|
||||
}
|
||||
ip := net.ParseIP(hostIP)
|
||||
if ip == nil {
|
||||
return 0, nil, fmt.Errorf("invalid IP address: %s", hostIP)
|
||||
}
|
||||
if isV6 {
|
||||
sa := &unix.SockaddrInet6{Port: int(port)}
|
||||
copy(sa.Addr[:], ip.To16())
|
||||
return unix.AF_INET6, sa, nil
|
||||
}
|
||||
sa := &unix.SockaddrInet4{Port: int(port)}
|
||||
copy(sa.Addr[:], ip.To4())
|
||||
return unix.AF_INET, sa, nil
|
||||
}
|
||||
|
||||
func setupSocketOpts(fd, domain int, hostIP string) error {
|
||||
if domain == unix.AF_INET6 {
|
||||
v6only := 1
|
||||
if hostIP == "" {
|
||||
v6only = 0
|
||||
}
|
||||
if err := unix.SetsockoptInt(fd, unix.IPPROTO_IPV6, unix.IPV6_V6ONLY, v6only); err != nil {
|
||||
return fmt.Errorf("cannot set IPV6_V6ONLY: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func bindPortV4Fallback(protocol string, sockType int, port uint16) (*os.File, error) {
|
||||
fd, err := unix.Socket(unix.AF_INET, sockType|unix.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot create socket for %s port %d: %w", protocol, port, err)
|
||||
}
|
||||
if err := unix.Bind(fd, &unix.SockaddrInet4{Port: int(port)}); err != nil {
|
||||
unix.Close(fd)
|
||||
return nil, fmt.Errorf("cannot bind %s port %s: %w", protocol, net.JoinHostPort("", strconv.FormatUint(uint64(port), 10)), err)
|
||||
}
|
||||
return os.NewFile(uintptr(fd), fmt.Sprintf("reservation-%s-%d", protocol, port)), nil
|
||||
}
|
||||
|
||||
func getOCIRuntimeError(name, runtimeMsg string) error {
|
||||
|
||||
@@ -1343,7 +1343,7 @@ EOF
|
||||
|
||||
echo "$_LOG_PROMPT journalctl -u $QUADLET_SERVICE_NAME"
|
||||
run -0 journalctl -eu $QUADLET_SERVICE_NAME
|
||||
assert "$output" =~ "$port: bind: address already in use" "journal contains the real podman start error"
|
||||
assert "$output" =~ "$port.*ddress already in use" "journal contains the real podman start error"
|
||||
|
||||
kill "$socat_pid"
|
||||
}
|
||||
|
||||
@@ -341,9 +341,11 @@ function run_pod_etc_hosts_test(){
|
||||
# flush the firewall rule here to break port forwarding
|
||||
nft delete table inet netavark || true
|
||||
|
||||
# check that we cannot curl (timeout after 1 sec)
|
||||
# check that we cannot curl: expect either connection refused (7)
|
||||
# or timeout (28) depending on whether the port reservation socket
|
||||
# is in listen state
|
||||
run curl --max-time 1 -s $SERVER/index.txt
|
||||
assert $status -eq 28 "curl did not time out"
|
||||
assert "$status" -ne 0 "curl should fail after flushing nftables"
|
||||
fi
|
||||
|
||||
# reload the network to recreate the nftables rules
|
||||
@@ -1138,4 +1140,22 @@ EOF
|
||||
run_podman network rm $netname2
|
||||
}
|
||||
|
||||
# Refer https://github.com/containers/netavark/issues/1338
|
||||
# bats test_tags=ci:parallel
|
||||
@test "podman run - dual-stack conflicts with explicit wildcards" {
|
||||
myport=$(random_free_port)
|
||||
cname1="c1-$(safename)"
|
||||
|
||||
run_podman run -d --name $cname1 -p $myport:8080 $IMAGE sleep inf
|
||||
cid1="$output"
|
||||
|
||||
run_podman 126 run --rm -p 0.0.0.0:$myport:8080 $IMAGE true
|
||||
assert "$output" =~ "ddress already in use" "explicit IPv4 wildcard should conflict with dual-stack"
|
||||
|
||||
run_podman 126 run --rm -p [::]:$myport:8080 $IMAGE true
|
||||
assert "$output" =~ "ddress already in use" "explicit IPv6 wildcard should conflict with dual-stack"
|
||||
|
||||
run_podman rm -f -t0 $cid1
|
||||
}
|
||||
|
||||
# vim: filetype=sh
|
||||
|
||||
@@ -30,58 +30,6 @@ function skip_if_no_ipv6() {
|
||||
fi
|
||||
}
|
||||
|
||||
### procfs access ##############################################################
|
||||
|
||||
# ipv6_to_procfs() - RFC 5952 IPv6 address text representation to procfs format
|
||||
# $1: Address in any notation described by RFC 5952
|
||||
function ipv6_to_procfs() {
|
||||
local addr="${1}"
|
||||
|
||||
# Add leading zero if missing
|
||||
case ${addr} in
|
||||
"::"*) addr=0"${addr}" ;;
|
||||
esac
|
||||
|
||||
# Double colon can mean any number of all-zero fields. Expand to fill
|
||||
# as many colons as are missing. (This will not be a valid IPv6 form,
|
||||
# but we don't need it for long). E.g., 0::1 -> 0:::::::1
|
||||
case ${addr} in
|
||||
*"::"*)
|
||||
# All the colons in the address
|
||||
local colons
|
||||
colons=$(tr -dc : <<<$addr)
|
||||
# subtract those from a string of eight colons; this gives us
|
||||
# a string of two to six colons...
|
||||
local pad
|
||||
pad=$(sed -e "s/$colons//" <<<":::::::")
|
||||
# ...which we then inject in place of the double colon.
|
||||
addr=$(sed -e "s/::/::$pad/" <<<$addr)
|
||||
;;
|
||||
esac
|
||||
|
||||
# Print as a contiguous string of zero-filled 16-bit words
|
||||
# (The additional ":" below is needed because 'read -d x' actually
|
||||
# means "x is a TERMINATOR, not a delimiter")
|
||||
local group
|
||||
while read -d : group; do
|
||||
printf "%04X" "0x${group:-0}"
|
||||
done <<<"${addr}:"
|
||||
}
|
||||
|
||||
# __ipv4_to_procfs() - Print bytes in hexadecimal notation reversing arguments
|
||||
# $@: IPv4 address as separate bytes
|
||||
function __ipv4_to_procfs() {
|
||||
printf "%02X%02X%02X%02X" ${4} ${3} ${2} ${1}
|
||||
}
|
||||
|
||||
# ipv4_to_procfs() - IPv4 address representation to big-endian procfs format
|
||||
# $1: Text representation of IPv4 address
|
||||
function ipv4_to_procfs() {
|
||||
IFS='.' read -r o1 o2 o3 o4 <<< $1
|
||||
__ipv4_to_procfs $o1 $o2 $o3 $o4
|
||||
}
|
||||
|
||||
|
||||
### Addresses, Routes, Links ###################################################
|
||||
|
||||
# ipv4_get_addr_global() - Print first global IPv4 address reported by netlink
|
||||
@@ -259,7 +207,7 @@ function unreserve_port() {
|
||||
local port=$1
|
||||
|
||||
local lockfile=$PORT_LOCK_DIR/$port
|
||||
-e $lockfile || die "Cannot unreserve non-reserved port $port"
|
||||
test -e $lockfile || die "Cannot unreserve non-reserved port $port"
|
||||
assert "$(< $lockfile)" = "$BATS_SUITE_TEST_NUMBER" \
|
||||
"Port $port is not reserved by this test"
|
||||
rm -f $lockfile
|
||||
@@ -348,32 +296,10 @@ function port_is_bound() {
|
||||
local proto="tcp"
|
||||
fi
|
||||
|
||||
# /proc/net/tcp is insufficient: it does not show some rootless ports.
|
||||
# ss does, so check it first.
|
||||
run ss -${proto:0:1}nlH sport = $port
|
||||
if [[ -n "$output" ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
port=$(printf %04X ${port})
|
||||
case "${address}" in
|
||||
*":"*)
|
||||
grep -e "^[^:]*: $(ipv6_to_procfs "${address}"):${port} .*" \
|
||||
-e "^[^:]*: $(ipv6_to_procfs "::0"):${port} .*" \
|
||||
-q "/proc/net/${proto}6"
|
||||
;;
|
||||
*"."*)
|
||||
grep -e "^[^:]*: $(ipv4_to_procfs "${address}"):${port}" \
|
||||
-e "^[^:]*: $(ipv4_to_procfs "0.0.0.0"):${port}" \
|
||||
-e "^[^:]*: $(ipv4_to_procfs "127.0.0.1"):${port}" \
|
||||
-q "/proc/net/${proto}"
|
||||
;;
|
||||
*)
|
||||
# No address: check both IPv4 and IPv6, for any bound address
|
||||
grep "^[^:]*: [^:]*:${port} .*" -q "/proc/net/${proto}6" || \
|
||||
grep "^[^:]*: [^:]*:${port} .*" -q "/proc/net/${proto}"
|
||||
;;
|
||||
esac
|
||||
# Use ss to check the local ports
|
||||
run -0 ss -${proto:0:1}nlH state all sport = $port
|
||||
# grep for exact address:port match or for "*:port" which means the port is bound to all addresses and hence bound for any address
|
||||
grep -q "$address:$port" <<<"$output" || grep -q "*:$port" <<<"$output"
|
||||
}
|
||||
|
||||
# port_is_free() - Check if TCP or UDP port is free to bind for a given address
|
||||
|
||||
@@ -232,23 +232,6 @@ check_result "$found" "16700" "random_free_port"
|
||||
|
||||
# END random_free_port
|
||||
###############################################################################
|
||||
# BEGIN ipv6_to_procfs
|
||||
|
||||
# Table of IPv6 short forms and their procfs equivalents. For readability,
|
||||
# spaces separate each 16-bit word. Spaces are removed when testing.
|
||||
table="
|
||||
2b06::1 | 2B06 0000 0000 0000 0000 0000 0000 0001
|
||||
::1 | 0000 0000 0000 0000 0000 0000 0000 0001
|
||||
0::1 | 0000 0000 0000 0000 0000 0000 0000 0001
|
||||
"
|
||||
|
||||
while read shortform expect; do
|
||||
actual=$(ipv6_to_procfs $shortform)
|
||||
check_result "$actual" "${expect// }" "ipv6_to_procfs $shortform"
|
||||
done < <(parse_table "$table")
|
||||
|
||||
# END ipv6_to_procfs
|
||||
###############################################################################
|
||||
# BEGIN subnet_in_use ... because that's complicated
|
||||
|
||||
# Override ip command
|
||||
|
||||
Reference in New Issue
Block a user