2020年4月3日 星期五

struct net_device

網路通訊需要網路介面卡存取通訊媒介,struct net_device 提供存取網路一致的界面,包括網卡驅動程式,是 character 和 block 外第三種 Linux 核心的 device,但不出現在 /dev 目錄。網卡可以是實體的或虛擬的 (如 loopback),也可能綁定協定 (如 PPP)。虛擬網路界面通常透過 priv 實作,例如 bonding。

重要成員
  • mtu:例如 Ethernet 是 1500,可透過「ifconfig <interface> mtu <mtu>」更改
  • flags:可透過 ifconfig 看到 UP BROADCAST RUNNING MULTICAST NOARP
  • dev_addr[]:MAC 位址
  • promiscuity:設為 promiscuous 模式的次數
  • IN_DEV 開頭巨集 (IN_DEV_FORWARD(), IN_DEV_RX_REDIRECTS())
  • hard_start_xmit():傳送封包的方式
  • ip_ptr
  • struct in_device 成員 cnf (ipv4_devconf) 包括 forwarding, accept_redirects, send_redirects 等,對應到 /proc/sys/net/ipv4/conf/all/???。
struct net_device {
 char   name[IFNAMSIZ];
 struct netdev_name_node *name_node;
 struct dev_ifalias __rcu *ifalias;
 /*
  * I/O specific fields
  * FIXME: Merge these and struct ifmap into one
  */
 unsigned long  mem_end;
 unsigned long  mem_start;
 unsigned long  base_addr;
 int   irq;

 /*
  * Some hardware also needs these fields (state,dev_list,
  * napi_list,unreg_list,close_list) but they are not
  * part of the usual set specified in Space.c.
  */

 unsigned long  state;

 struct list_head dev_list;
 struct list_head napi_list;
 struct list_head unreg_list;
 struct list_head close_list;
 struct list_head ptype_all;
 struct list_head ptype_specific;

 struct {
  struct list_head upper;
  struct list_head lower;
 } adj_list;

 netdev_features_t features;
 netdev_features_t hw_features;
 netdev_features_t wanted_features;
 netdev_features_t vlan_features;
 netdev_features_t hw_enc_features;
 netdev_features_t mpls_features;
 netdev_features_t gso_partial_features;

 int   ifindex;
 int   group;

 struct net_device_stats stats;

 atomic_long_t  rx_dropped;
 atomic_long_t  tx_dropped;
 atomic_long_t  rx_nohandler;

 /* Stats to monitor link on/off, flapping */
 atomic_t  carrier_up_count;
 atomic_t  carrier_down_count;

#ifdef CONFIG_WIRELESS_EXT
 const struct iw_handler_def *wireless_handlers;
 struct iw_public_data *wireless_data;
#endif
 const struct net_device_ops *netdev_ops;
 const struct ethtool_ops *ethtool_ops;
#ifdef CONFIG_NET_L3_MASTER_DEV
 const struct l3mdev_ops *l3mdev_ops;
#endif
#if IS_ENABLED(CONFIG_IPV6)
 const struct ndisc_ops *ndisc_ops;
#endif

#ifdef CONFIG_XFRM_OFFLOAD
 const struct xfrmdev_ops *xfrmdev_ops;
#endif

#if IS_ENABLED(CONFIG_TLS_DEVICE)
 const struct tlsdev_ops *tlsdev_ops;
#endif

 const struct header_ops *header_ops;

 unsigned int  flags;
 unsigned int  priv_flags;

 unsigned short  gflags;
 unsigned short  padded;

 unsigned char  operstate;
 unsigned char  link_mode;

 unsigned char  if_port;
 unsigned char  dma;

 /* Note : dev->mtu is often read without holding a lock.
  * Writers usually hold RTNL.
  * It is recommended to use READ_ONCE() to annotate the reads,
  * and to use WRITE_ONCE() to annotate the writes.
  */
 unsigned int  mtu;
 unsigned int  min_mtu;
 unsigned int  max_mtu;
 unsigned short  type;
 unsigned short  hard_header_len;
 unsigned char  min_header_len;

 unsigned short  needed_headroom;
 unsigned short  needed_tailroom;

 /* Interface address info. */
 unsigned char  perm_addr[MAX_ADDR_LEN];
 unsigned char  addr_assign_type;
 unsigned char  addr_len;
 unsigned char  upper_level;
 unsigned char  lower_level;
 unsigned short  neigh_priv_len;
 unsigned short          dev_id;
 unsigned short          dev_port;
 spinlock_t  addr_list_lock;
 unsigned char  name_assign_type;
 bool   uc_promisc;
 struct netdev_hw_addr_list uc;
 struct netdev_hw_addr_list mc;
 struct netdev_hw_addr_list dev_addrs;

#ifdef CONFIG_SYSFS
 struct kset  *queues_kset;
#endif
 unsigned int  promiscuity;
 unsigned int  allmulti;


 /* Protocol-specific pointers */

#if IS_ENABLED(CONFIG_VLAN_8021Q)
 struct vlan_info __rcu *vlan_info;
#endif
#if IS_ENABLED(CONFIG_NET_DSA)
 struct dsa_port  *dsa_ptr;
#endif
#if IS_ENABLED(CONFIG_TIPC)
 struct tipc_bearer __rcu *tipc_ptr;
#endif
#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
 void    *atalk_ptr;
#endif
 struct in_device __rcu *ip_ptr;
#if IS_ENABLED(CONFIG_DECNET)
 struct dn_dev __rcu     *dn_ptr;
#endif
 struct inet6_dev __rcu *ip6_ptr;
#if IS_ENABLED(CONFIG_AX25)
 void   *ax25_ptr;
#endif
 struct wireless_dev *ieee80211_ptr;
 struct wpan_dev  *ieee802154_ptr;
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
 struct mpls_dev __rcu *mpls_ptr;
#endif

/*
 * Cache lines mostly used on receive path (including eth_type_trans())
 */
 /* Interface address info used in eth_type_trans() */
 unsigned char  *dev_addr;

 struct netdev_rx_queue *_rx;
 unsigned int  num_rx_queues;
 unsigned int  real_num_rx_queues;

 struct bpf_prog __rcu *xdp_prog;
 unsigned long  gro_flush_timeout;
 rx_handler_func_t __rcu *rx_handler;
 void __rcu  *rx_handler_data;

#ifdef CONFIG_NET_CLS_ACT
 struct mini_Qdisc __rcu *miniq_ingress;
#endif
 struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
 struct nf_hook_entries __rcu *nf_hooks_ingress;
#endif

 unsigned char  broadcast[MAX_ADDR_LEN];
#ifdef CONFIG_RFS_ACCEL
 struct cpu_rmap  *rx_cpu_rmap;
#endif
 struct hlist_node index_hlist;

/*
 * Cache lines mostly used on transmit path
 */
 struct netdev_queue *_tx ____cacheline_aligned_in_smp;
 unsigned int  num_tx_queues;
 unsigned int  real_num_tx_queues;
 struct Qdisc  *qdisc;
 unsigned int  tx_queue_len;
 spinlock_t  tx_global_lock;

 struct xdp_dev_bulk_queue __percpu *xdp_bulkq;

#ifdef CONFIG_XPS
 struct xps_dev_maps __rcu *xps_cpus_map;
 struct xps_dev_maps __rcu *xps_rxqs_map;
#endif
#ifdef CONFIG_NET_CLS_ACT
 struct mini_Qdisc __rcu *miniq_egress;
#endif

#ifdef CONFIG_NET_SCHED
 DECLARE_HASHTABLE (qdisc_hash, 4);
#endif
 /* These may be needed for future network-power-down code. */
 struct timer_list watchdog_timer;
 int   watchdog_timeo;

 struct list_head todo_list;
 int __percpu  *pcpu_refcnt;

 struct list_head link_watch_list;

 enum { NETREG_UNINITIALIZED=0,
        NETREG_REGISTERED, /* completed register_netdevice */
        NETREG_UNREGISTERING, /* called unregister_netdevice */
        NETREG_UNREGISTERED, /* completed unregister todo */
        NETREG_RELEASED,  /* called free_netdev */
        NETREG_DUMMY,  /* dummy device for NAPI poll */
 } reg_state:8;

 bool dismantle;

 enum {
  RTNL_LINK_INITIALIZED,
  RTNL_LINK_INITIALIZING,
 } rtnl_link_state:16;

 bool needs_free_netdev;
 void (*priv_destructor)(struct net_device *dev);

#ifdef CONFIG_NETPOLL
 struct netpoll_info __rcu *npinfo;
#endif

 possible_net_t   nd_net;

 /* mid-layer private */
 union {
  void     *ml_priv;
  struct pcpu_lstats __percpu  *lstats;
  struct pcpu_sw_netstats __percpu *tstats;
  struct pcpu_dstats __percpu  *dstats;
 };

#if IS_ENABLED(CONFIG_GARP)
 struct garp_port __rcu *garp_port;
#endif
#if IS_ENABLED(CONFIG_MRP)
 struct mrp_port __rcu *mrp_port;
#endif

 struct device  dev;
 const struct attribute_group *sysfs_groups[4];
 const struct attribute_group *sysfs_rx_queue_group;

 const struct rtnl_link_ops *rtnl_link_ops;

 /* for setting kernel sock attribute on TCP connection setup */
#define GSO_MAX_SIZE  65536
 unsigned int  gso_max_size;
#define GSO_MAX_SEGS  65535
 u16   gso_max_segs;

#ifdef CONFIG_DCB
 const struct dcbnl_rtnl_ops *dcbnl_ops;
#endif
 s16   num_tc;
 struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
 u8   prio_tc_map[TC_BITMASK + 1];

#if IS_ENABLED(CONFIG_FCOE)
 unsigned int  fcoe_ddp_xid;
#endif
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 struct netprio_map __rcu *priomap;
#endif
 struct phy_device *phydev;
 struct sfp_bus  *sfp_bus;
 struct lock_class_key qdisc_tx_busylock_key;
 struct lock_class_key qdisc_running_key;
 struct lock_class_key qdisc_xmit_lock_key;
 struct lock_class_key addr_list_lock_key;
 bool   proto_down;
 unsigned  wol_enabled:1;

 struct list_head net_notifier_list;
};

eth_mangle_rx (OpenWrt 特有?)

許多網路界面是 PCI devices 會使用 PCI 通用的函數如 pci_register_driver() 和 pci_enable_device()。有些是 USB devices。驅動程式可見 ldd3 chap17。

註:bonding:兩個以上網路界面使用相同 IP,作為 load balancing 及 high availability 使用。
參考來源:
  1. Linux Kernel Networking by Rami Rosen at Haifux, August 2007
延伸閱讀
http://www.haifux.org/lectures/187/netLec3.pdf

沒有留言:

張貼留言

SIP header Via

所有 SIP 訊息 都要有 Via,縮寫 v。一開始的 UAC 和後續途經的每個 proxy 都會疊加一個 Via 放傳送的位址,依序作為回應的路徑。 格式 sent-protocol sent-by [ ;branch= branch ][ ; 參數 ...] s...