network: forget IPv4 non-local routes when an interface went down (#35099)

Fixes #35047.
This commit is contained in:
Yu Watanabe
2024-11-12 01:07:43 +09:00
committed by GitHub
9 changed files with 180 additions and 25 deletions

View File

@@ -1895,6 +1895,9 @@ static int link_admin_state_up(Link *link) {
static int link_admin_state_down(Link *link) {
assert(link);
link_forget_nexthops(link);
link_forget_routes(link);
if (!link->network)
return 0;

View File

@@ -485,31 +485,23 @@ static void log_nexthop_debug(const NextHop *nexthop, const char *str, Manager *
yes_no(nexthop->blackhole), strna(group), strna(flags));
}
static int nexthop_remove_dependents(NextHop *nexthop, Manager *manager) {
int r = 0;
static void nexthop_forget_dependents(NextHop *nexthop, Manager *manager) {
assert(nexthop);
assert(manager);
/* If a nexthop is removed, the kernel silently removes nexthops and routes that depend on the
* removed nexthop. Let's remove them for safety (though, they are already removed in the kernel,
* hence that should fail), and forget them. */
void *id;
SET_FOREACH(id, nexthop->nexthops) {
NextHop *nh;
if (nexthop_get_by_id(manager, PTR_TO_UINT32(id), &nh) < 0)
continue;
RET_GATHER(r, nexthop_remove(nh, manager));
}
/* If a nexthop is removed, the kernel silently removes routes that depend on the removed nexthop.
* Let's forget them. */
Route *route;
SET_FOREACH(route, nexthop->routes)
RET_GATHER(r, route_remove(route, manager));
SET_FOREACH(route, nexthop->routes) {
Request *req;
if (route_get_request(manager, route, &req) >= 0)
route_enter_removed(req->userdata);
return r;
route_enter_removed(route);
log_route_debug(route, "Forgetting silently removed", manager);
route_detach(route);
}
}
static int nexthop_remove_handler(sd_netlink *rtnl, sd_netlink_message *m, RemoveRequest *rreq) {
@@ -527,7 +519,7 @@ static int nexthop_remove_handler(sd_netlink *rtnl, sd_netlink_message *m, Remov
(r == -ENOENT || !nexthop->manager) ? LOG_DEBUG : LOG_WARNING,
r, "Could not drop nexthop, ignoring");
(void) nexthop_remove_dependents(nexthop, manager);
nexthop_forget_dependents(nexthop, manager);
if (nexthop->manager) {
/* If the nexthop cannot be removed, then assume the nexthop is already removed. */
@@ -925,6 +917,61 @@ int link_drop_nexthops(Link *link, bool only_static) {
return r;
}
static void nexthop_forget_one(NextHop *nexthop) {
assert(nexthop);
assert(nexthop->manager);
Request *req;
if (nexthop_get_request_by_id(nexthop->manager, nexthop->id, &req) >= 0)
route_enter_removed(req->userdata);
nexthop_enter_removed(nexthop);
log_nexthop_debug(nexthop, "Forgetting silently removed", nexthop->manager);
nexthop_forget_dependents(nexthop, nexthop->manager);
nexthop_detach(nexthop);
}
void link_forget_nexthops(Link *link) {
assert(link);
assert(link->manager);
assert(link->ifindex > 0);
assert(!FLAGS_SET(link->flags, IFF_UP));
/* See comments in link_forget_routes(). */
/* Remove all IPv4 nexthops. */
NextHop *nexthop;
HASHMAP_FOREACH(nexthop, link->manager->nexthops_by_id) {
if (nexthop->ifindex != link->ifindex)
continue;
if (nexthop->family != AF_INET)
continue;
nexthop_forget_one(nexthop);
}
/* Remove all group nexthops their all members are removed in the above. */
HASHMAP_FOREACH(nexthop, link->manager->nexthops_by_id) {
if (hashmap_isempty(nexthop->group))
continue;
/* Update group members. */
struct nexthop_grp *nhg;
HASHMAP_FOREACH(nhg, nexthop->group) {
if (nexthop_get_by_id(nexthop->manager, nhg->id, NULL) >= 0)
continue;
assert_se(hashmap_remove(nexthop->group, UINT32_TO_PTR(nhg->id)) == nhg);
free(nhg);
}
if (!hashmap_isempty(nexthop->group))
continue; /* At least one group member still exists. */
nexthop_forget_one(nexthop);
}
}
static int nexthop_update_group(NextHop *nexthop, sd_netlink_message *message) {
_cleanup_hashmap_free_free_ Hashmap *h = NULL;
_cleanup_free_ struct nexthop_grp *group = NULL;
@@ -1032,7 +1079,7 @@ int manager_rtnl_process_nexthop(sd_netlink *rtnl, sd_netlink_message *message,
if (nexthop) {
nexthop_enter_removed(nexthop);
log_nexthop_debug(nexthop, "Forgetting removed", m);
(void) nexthop_remove_dependents(nexthop, m);
nexthop_forget_dependents(nexthop, m);
nexthop_detach(nexthop);
} else
log_nexthop_debug(&(const NextHop) { .id = id }, "Kernel removed unknown", m);

View File

@@ -60,6 +60,7 @@ static inline int link_drop_unmanaged_nexthops(Link *link) {
static inline int link_drop_static_nexthops(Link *link) {
return link_drop_nexthops(link, /* only_static = */ true);
}
void link_forget_nexthops(Link *link);
int link_request_static_nexthops(Link *link, bool only_ipv4);

View File

@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <linux/if.h>
#include <linux/ipv6_route.h>
#include <linux/nexthop.h>
@@ -47,7 +48,7 @@ static Route* route_detach_impl(Route *route) {
return NULL;
}
static void route_detach(Route *route) {
void route_detach(Route *route) {
route_unref(route_detach_impl(route));
}
@@ -1138,6 +1139,8 @@ static int process_route_one(
}
}
route_attach_to_nexthop(route);
route_enter_configured(route);
log_route_debug(route, is_new ? "Received new" : "Received remembered", manager);
@@ -1538,6 +1541,37 @@ int link_drop_routes(Link *link, bool only_static) {
return r;
}
void link_forget_routes(Link *link) {
assert(link);
assert(link->ifindex > 0);
assert(!FLAGS_SET(link->flags, IFF_UP));
/* When an interface went down, IPv4 non-local routes bound to the interface are silently removed by
* the kernel, without any notifications. Let's forget them in that case. Otherwise, when the link
* goes up later, the configuration order of routes may be confused by the nonexistent routes.
* See issue #35047. */
Route *route;
SET_FOREACH(route, link->manager->routes) {
// TODO: handle multipath routes
if (route->nexthop.ifindex != link->ifindex)
continue;
if (route->family != AF_INET)
continue;
// TODO: check RTN_NAT and RTN_XRESOLVE
if (!IN_SET(route->type, RTN_UNICAST, RTN_BROADCAST, RTN_ANYCAST, RTN_MULTICAST))
continue;
Request *req;
if (route_get_request(link->manager, route, &req) >= 0)
route_enter_removed(req->userdata);
route_enter_removed(route);
log_route_debug(route, "Forgetting silently removed", link->manager);
route_detach(route);
}
}
int network_add_ipv4ll_route(Network *network) {
_cleanup_(route_unref_or_set_invalidp) Route *route = NULL;
unsigned section_line;

View File

@@ -89,6 +89,8 @@ Route* route_ref(Route *route);
Route* route_unref(Route *route);
DEFINE_SECTION_CLEANUP_FUNCTIONS(Route, route_unref);
void route_detach(Route *route);
int route_new(Route **ret);
int route_new_static(Network *network, const char *filename, unsigned section_line, Route **ret);
int route_dup(const Route *src, const RouteNextHop *nh, Route **ret);
@@ -109,6 +111,7 @@ static inline int link_drop_static_routes(Link *link) {
static inline int link_drop_unmanaged_routes(Link *link) {
return link_drop_routes(link, false);
}
void link_forget_routes(Link *link);
int link_request_route(
Link *link,

View File

@@ -0,0 +1,10 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
[Match]
Name=dummy98
[Network]
IPv6AcceptRA=no
[Route]
Destination=198.51.100.0/24
Gateway=192.0.2.2

View File

@@ -0,0 +1,4 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
[Address]
Address=192.0.2.1/32
Peer=192.0.2.2/32

View File

@@ -0,0 +1,6 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
[Address]
Address=192.0.2.1/32
[Route]
Destination=192.0.2.2/32

View File

@@ -4050,6 +4050,46 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities):
with self.subTest(manage_foreign_routes=manage_foreign_routes):
self._test_route_static(manage_foreign_routes)
def test_route_static_issue_35047(self):
copy_network_unit(
'25-route-static-issue-35047.network',
'25-route-static-issue-35047.network.d/step1.conf',
'12-dummy.netdev',
copy_dropins=False)
start_networkd()
self.wait_online('dummy98:routable')
print('### ip -4 route show table all dev dummy98')
output = check_output('ip -4 route show table all dev dummy98')
print(output)
self.assertIn('192.0.2.2 proto kernel scope link src 192.0.2.1', output)
self.assertIn('local 192.0.2.1 table local proto kernel scope host src 192.0.2.1', output)
self.assertIn('198.51.100.0/24 via 192.0.2.2 proto static', output)
check_output('ip link set dev dummy98 down')
self.wait_route_dropped('dummy98', '192.0.2.2 proto kernel scope link src 192.0.2.1', ipv='-4', table='all', timeout_sec=10)
self.wait_route_dropped('dummy98', 'local 192.0.2.1 table local proto kernel scope host src 192.0.2.1', ipv='-4', table='all', timeout_sec=10)
self.wait_route_dropped('dummy98', '198.51.100.0/24 via 192.0.2.2 proto static', ipv='-4', table='all', timeout_sec=10)
print('### ip -4 route show table all dev dummy98')
output = check_output('ip -4 route show table all dev dummy98')
print(output)
self.assertNotIn('192.0.2.2', output)
self.assertNotIn('192.0.2.1', output)
self.assertNotIn('198.51.100.0/24', output)
remove_network_unit('25-route-static-issue-35047.network.d/step1.conf')
copy_network_unit('25-route-static-issue-35047.network.d/step2.conf')
networkctl_reload()
self.wait_online('dummy98:routable')
print('### ip -4 route show table all dev dummy98')
output = check_output('ip -4 route show table all dev dummy98')
print(output)
self.assertIn('192.0.2.2 proto static scope link', output)
self.assertIn('local 192.0.2.1 table local proto kernel scope host src 192.0.2.1', output)
self.assertIn('198.51.100.0/24 via 192.0.2.2 proto static', output)
@expectedFailureIfRTA_VIAIsNotSupported()
def test_route_via_ipv6(self):
copy_network_unit('25-route-via-ipv6.network', '12-dummy.netdev')
@@ -4798,11 +4838,18 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities):
# Remove nexthop with ID 20
check_output('ip nexthop del id 20')
# Check the nexthop ID 20 is dropped from the group nexthop.
output = check_output('ip -0 nexthop list id 21')
print(output)
self.assertRegex(output, r'id 21 group 1,3')
# Remove nexthop with ID 21
check_output('ip nexthop del id 21')
copy_network_unit('11-dummy.netdev', '25-nexthop-test1.network')
networkctl_reload()
# 25-nexthop-test1.network requests a route with nexthop ID 21,
# which is silently removed by the kernel when nexthop with ID 20 is removed in the above,
# 25-nexthop-test1.network requests a route with nexthop ID 21, which is removed in the above,
# hence test1 should be stuck in the configuring state.
self.wait_operstate('test1', operstate='routable', setup_state='configuring')
@@ -4811,7 +4858,7 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities):
output = networkctl_status('test1')
self.assertIn('State: routable (configuring)', output)
# Check if the route which needs nexthop 20 and 21 are forgotten.
# Check if the route which needs nexthop 21 are forgotten.
output = networkctl_json()
check_json(output)
self.assertNotIn('"Destination":[10.10.10.14]', output)