John Ogness | b6cf8b3 | 2020-07-09 15:29:42 +0206 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | |
| 3 | #ifndef _KERNEL_PRINTK_RINGBUFFER_H |
| 4 | #define _KERNEL_PRINTK_RINGBUFFER_H |
| 5 | |
| 6 | #include <linux/atomic.h> |
| 7 | |
| 8 | /* |
| 9 | * Meta information about each stored message. |
| 10 | * |
| 11 | * All fields are set and used by the printk code except for |
| 12 | * @seq, @text_len, @dict_len, which are set and/or modified |
| 13 | * by the ringbuffer code. |
| 14 | */ |
| 15 | struct printk_info { |
| 16 | u64 seq; /* sequence number */ |
| 17 | u64 ts_nsec; /* timestamp in nanoseconds */ |
| 18 | u16 text_len; /* length of text message */ |
| 19 | u16 dict_len; /* length of dictionary message */ |
| 20 | u8 facility; /* syslog facility */ |
| 21 | u8 flags:5; /* internal record flags */ |
| 22 | u8 level:3; /* syslog level */ |
| 23 | u32 caller_id; /* thread id or processor id */ |
| 24 | }; |
| 25 | |
| 26 | /* |
| 27 | * A structure providing the buffers, used by writers and readers. |
| 28 | * |
| 29 | * Writers: |
| 30 | * Using prb_rec_init_wr(), a writer sets @text_buf_size and @dict_buf_size |
| 31 | * before calling prb_reserve(). On success, prb_reserve() sets @info, |
| 32 | * @text_buf, @dict_buf to buffers reserved for that writer. |
| 33 | * |
| 34 | * Readers: |
| 35 | * Using prb_rec_init_rd(), a reader sets all fields before calling |
| 36 | * prb_read_valid(). Note that the reader provides the @info, @text_buf, |
| 37 | * @dict_buf buffers. On success, the struct pointed to by @info will be |
| 38 | * filled and the char arrays pointed to by @text_buf and @dict_buf will |
| 39 | * be filled with text and dict data. |
| 40 | */ |
| 41 | struct printk_record { |
| 42 | struct printk_info *info; |
| 43 | char *text_buf; |
| 44 | char *dict_buf; |
| 45 | unsigned int text_buf_size; |
| 46 | unsigned int dict_buf_size; |
| 47 | }; |
| 48 | |
| 49 | /* Specifies the logical position and span of a data block. */ |
| 50 | struct prb_data_blk_lpos { |
| 51 | unsigned long begin; |
| 52 | unsigned long next; |
| 53 | }; |
| 54 | |
| 55 | /* |
| 56 | * A descriptor: the complete meta-data for a record. |
| 57 | * |
| 58 | * @state_var: A bitwise combination of descriptor ID and descriptor state. |
| 59 | */ |
| 60 | struct prb_desc { |
| 61 | struct printk_info info; |
| 62 | atomic_long_t state_var; |
| 63 | struct prb_data_blk_lpos text_blk_lpos; |
| 64 | struct prb_data_blk_lpos dict_blk_lpos; |
| 65 | }; |
| 66 | |
| 67 | /* A ringbuffer of "ID + data" elements. */ |
| 68 | struct prb_data_ring { |
| 69 | unsigned int size_bits; |
| 70 | char *data; |
| 71 | atomic_long_t head_lpos; |
| 72 | atomic_long_t tail_lpos; |
| 73 | }; |
| 74 | |
| 75 | /* A ringbuffer of "struct prb_desc" elements. */ |
| 76 | struct prb_desc_ring { |
| 77 | unsigned int count_bits; |
| 78 | struct prb_desc *descs; |
| 79 | atomic_long_t head_id; |
| 80 | atomic_long_t tail_id; |
| 81 | }; |
| 82 | |
| 83 | /* |
| 84 | * The high level structure representing the printk ringbuffer. |
| 85 | * |
| 86 | * @fail: Count of failed prb_reserve() calls where not even a data-less |
| 87 | * record was created. |
| 88 | */ |
| 89 | struct printk_ringbuffer { |
| 90 | struct prb_desc_ring desc_ring; |
| 91 | struct prb_data_ring text_data_ring; |
| 92 | struct prb_data_ring dict_data_ring; |
| 93 | atomic_long_t fail; |
| 94 | }; |
| 95 | |
| 96 | /* |
| 97 | * Used by writers as a reserve/commit handle. |
| 98 | * |
| 99 | * @rb: Ringbuffer where the entry is reserved. |
| 100 | * @irqflags: Saved irq flags to restore on entry commit. |
| 101 | * @id: ID of the reserved descriptor. |
| 102 | * @text_space: Total occupied buffer space in the text data ring, including |
| 103 | * ID, alignment padding, and wrapping data blocks. |
| 104 | * |
| 105 | * This structure is an opaque handle for writers. Its contents are only |
| 106 | * to be used by the ringbuffer implementation. |
| 107 | */ |
| 108 | struct prb_reserved_entry { |
| 109 | struct printk_ringbuffer *rb; |
| 110 | unsigned long irqflags; |
| 111 | unsigned long id; |
| 112 | unsigned int text_space; |
| 113 | }; |
| 114 | |
John Ogness | 10dcb06 | 2020-09-14 14:39:52 +0206 | [diff] [blame] | 115 | /* The possible responses of a descriptor state-query. */ |
| 116 | enum desc_state { |
| 117 | desc_miss = -1, /* ID mismatch (pseudo state) */ |
| 118 | desc_reserved = 0x0, /* reserved, in use by writer */ |
John Ogness | 4cfc725 | 2020-09-14 14:39:53 +0206 | [diff] [blame^] | 119 | desc_committed = 0x1, /* committed by writer, could get reopened */ |
| 120 | desc_finalized = 0x2, /* committed, no further modification allowed */ |
John Ogness | 10dcb06 | 2020-09-14 14:39:52 +0206 | [diff] [blame] | 121 | desc_reusable = 0x3, /* free, not yet used by any writer */ |
| 122 | }; |
| 123 | |
| 124 | #define _DATA_SIZE(sz_bits) (1UL << (sz_bits)) |
| 125 | #define _DESCS_COUNT(ct_bits) (1U << (ct_bits)) |
| 126 | #define DESC_SV_BITS (sizeof(unsigned long) * 8) |
| 127 | #define DESC_FLAGS_SHIFT (DESC_SV_BITS - 2) |
| 128 | #define DESC_FLAGS_MASK (3UL << DESC_FLAGS_SHIFT) |
| 129 | #define DESC_STATE(sv) (3UL & (sv >> DESC_FLAGS_SHIFT)) |
| 130 | #define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id) |
| 131 | #define DESC_ID_MASK (~DESC_FLAGS_MASK) |
| 132 | #define DESC_ID(sv) ((sv) & DESC_ID_MASK) |
| 133 | #define FAILED_LPOS 0x1 |
| 134 | #define NO_LPOS 0x3 |
John Ogness | b6cf8b3 | 2020-07-09 15:29:42 +0206 | [diff] [blame] | 135 | |
John Ogness | d397820 | 2020-07-21 15:31:28 +0206 | [diff] [blame] | 136 | #define FAILED_BLK_LPOS \ |
John Ogness | b6cf8b3 | 2020-07-09 15:29:42 +0206 | [diff] [blame] | 137 | { \ |
John Ogness | d397820 | 2020-07-21 15:31:28 +0206 | [diff] [blame] | 138 | .begin = FAILED_LPOS, \ |
| 139 | .next = FAILED_LPOS, \ |
John Ogness | b6cf8b3 | 2020-07-09 15:29:42 +0206 | [diff] [blame] | 140 | } |
| 141 | |
| 142 | /* |
| 143 | * Descriptor Bootstrap |
| 144 | * |
| 145 | * The descriptor array is minimally initialized to allow immediate usage |
| 146 | * by readers and writers. The requirements that the descriptor array |
| 147 | * initialization must satisfy: |
| 148 | * |
| 149 | * Req1 |
| 150 | * The tail must point to an existing (committed or reusable) descriptor. |
| 151 | * This is required by the implementation of prb_first_seq(). |
| 152 | * |
| 153 | * Req2 |
| 154 | * Readers must see that the ringbuffer is initially empty. |
| 155 | * |
| 156 | * Req3 |
| 157 | * The first record reserved by a writer is assigned sequence number 0. |
| 158 | * |
| 159 | * To satisfy Req1, the tail initially points to a descriptor that is |
| 160 | * minimally initialized (having no data block, i.e. data-less with the |
John Ogness | d397820 | 2020-07-21 15:31:28 +0206 | [diff] [blame] | 161 | * data block's lpos @begin and @next values set to FAILED_LPOS). |
John Ogness | b6cf8b3 | 2020-07-09 15:29:42 +0206 | [diff] [blame] | 162 | * |
| 163 | * To satisfy Req2, the initial tail descriptor is initialized to the |
| 164 | * reusable state. Readers recognize reusable descriptors as existing |
| 165 | * records, but skip over them. |
| 166 | * |
| 167 | * To satisfy Req3, the last descriptor in the array is used as the initial |
| 168 | * head (and tail) descriptor. This allows the first record reserved by a |
| 169 | * writer (head + 1) to be the first descriptor in the array. (Only the first |
| 170 | * descriptor in the array could have a valid sequence number of 0.) |
| 171 | * |
| 172 | * The first time a descriptor is reserved, it is assigned a sequence number |
| 173 | * with the value of the array index. A "first time reserved" descriptor can |
| 174 | * be recognized because it has a sequence number of 0 but does not have an |
| 175 | * index of 0. (Only the first descriptor in the array could have a valid |
| 176 | * sequence number of 0.) After the first reservation, all future reservations |
| 177 | * (recycling) simply involve incrementing the sequence number by the array |
| 178 | * count. |
| 179 | * |
| 180 | * Hack #1 |
| 181 | * Only the first descriptor in the array is allowed to have the sequence |
| 182 | * number 0. In this case it is not possible to recognize if it is being |
| 183 | * reserved the first time (set to index value) or has been reserved |
| 184 | * previously (increment by the array count). This is handled by _always_ |
| 185 | * incrementing the sequence number by the array count when reserving the |
| 186 | * first descriptor in the array. In order to satisfy Req3, the sequence |
| 187 | * number of the first descriptor in the array is initialized to minus |
| 188 | * the array count. Then, upon the first reservation, it is incremented |
| 189 | * to 0, thus satisfying Req3. |
| 190 | * |
| 191 | * Hack #2 |
| 192 | * prb_first_seq() can be called at any time by readers to retrieve the |
| 193 | * sequence number of the tail descriptor. However, due to Req2 and Req3, |
| 194 | * initially there are no records to report the sequence number of |
| 195 | * (sequence numbers are u64 and there is nothing less than 0). To handle |
| 196 | * this, the sequence number of the initial tail descriptor is initialized |
| 197 | * to 0. Technically this is incorrect, because there is no record with |
| 198 | * sequence number 0 (yet) and the tail descriptor is not the first |
| 199 | * descriptor in the array. But it allows prb_read_valid() to correctly |
| 200 | * report the existence of a record for _any_ given sequence number at all |
| 201 | * times. Bootstrapping is complete when the tail is pushed the first |
| 202 | * time, thus finally pointing to the first descriptor reserved by a |
| 203 | * writer, which has the assigned sequence number 0. |
| 204 | */ |
| 205 | |
| 206 | /* |
| 207 | * Initiating Logical Value Overflows |
| 208 | * |
| 209 | * Both logical position (lpos) and ID values can be mapped to array indexes |
| 210 | * but may experience overflows during the lifetime of the system. To ensure |
| 211 | * that printk_ringbuffer can handle the overflows for these types, initial |
| 212 | * values are chosen that map to the correct initial array indexes, but will |
| 213 | * result in overflows soon. |
| 214 | * |
| 215 | * BLK0_LPOS |
| 216 | * The initial @head_lpos and @tail_lpos for data rings. It is at index |
| 217 | * 0 and the lpos value is such that it will overflow on the first wrap. |
| 218 | * |
| 219 | * DESC0_ID |
| 220 | * The initial @head_id and @tail_id for the desc ring. It is at the last |
| 221 | * index of the descriptor array (see Req3 above) and the ID value is such |
| 222 | * that it will overflow on the second wrap. |
| 223 | */ |
| 224 | #define BLK0_LPOS(sz_bits) (-(_DATA_SIZE(sz_bits))) |
| 225 | #define DESC0_ID(ct_bits) DESC_ID(-(_DESCS_COUNT(ct_bits) + 1)) |
John Ogness | 10dcb06 | 2020-09-14 14:39:52 +0206 | [diff] [blame] | 226 | #define DESC0_SV(ct_bits) DESC_SV(DESC0_ID(ct_bits), desc_reusable) |
John Ogness | b6cf8b3 | 2020-07-09 15:29:42 +0206 | [diff] [blame] | 227 | |
| 228 | /* |
| 229 | * Define a ringbuffer with an external text data buffer. The same as |
| 230 | * DEFINE_PRINTKRB() but requires specifying an external buffer for the |
| 231 | * text data. |
| 232 | * |
| 233 | * Note: The specified external buffer must be of the size: |
| 234 | * 2 ^ (descbits + avgtextbits) |
| 235 | */ |
| 236 | #define _DEFINE_PRINTKRB(name, descbits, avgtextbits, avgdictbits, text_buf) \ |
| 237 | static char _##name##_dict[1U << ((avgdictbits) + (descbits))] \ |
| 238 | __aligned(__alignof__(unsigned long)); \ |
| 239 | static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = { \ |
| 240 | /* this will be the first record reserved by a writer */ \ |
| 241 | [0] = { \ |
| 242 | .info = { \ |
| 243 | /* will be incremented to 0 on the first reservation */ \ |
| 244 | .seq = -(u64)_DESCS_COUNT(descbits), \ |
| 245 | }, \ |
| 246 | }, \ |
| 247 | /* the initial head and tail */ \ |
| 248 | [_DESCS_COUNT(descbits) - 1] = { \ |
| 249 | .info = { \ |
| 250 | /* reports the first seq value during the bootstrap phase */ \ |
| 251 | .seq = 0, \ |
| 252 | }, \ |
| 253 | /* reusable */ \ |
| 254 | .state_var = ATOMIC_INIT(DESC0_SV(descbits)), \ |
| 255 | /* no associated data block */ \ |
John Ogness | d397820 | 2020-07-21 15:31:28 +0206 | [diff] [blame] | 256 | .text_blk_lpos = FAILED_BLK_LPOS, \ |
| 257 | .dict_blk_lpos = FAILED_BLK_LPOS, \ |
John Ogness | b6cf8b3 | 2020-07-09 15:29:42 +0206 | [diff] [blame] | 258 | }, \ |
| 259 | }; \ |
| 260 | static struct printk_ringbuffer name = { \ |
| 261 | .desc_ring = { \ |
| 262 | .count_bits = descbits, \ |
| 263 | .descs = &_##name##_descs[0], \ |
| 264 | .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \ |
| 265 | .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \ |
| 266 | }, \ |
| 267 | .text_data_ring = { \ |
| 268 | .size_bits = (avgtextbits) + (descbits), \ |
| 269 | .data = text_buf, \ |
| 270 | .head_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ |
| 271 | .tail_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ |
| 272 | }, \ |
| 273 | .dict_data_ring = { \ |
| 274 | .size_bits = (avgtextbits) + (descbits), \ |
| 275 | .data = &_##name##_dict[0], \ |
| 276 | .head_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ |
| 277 | .tail_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ |
| 278 | }, \ |
| 279 | .fail = ATOMIC_LONG_INIT(0), \ |
| 280 | } |
| 281 | |
| 282 | /** |
| 283 | * DEFINE_PRINTKRB() - Define a ringbuffer. |
| 284 | * |
| 285 | * @name: The name of the ringbuffer variable. |
| 286 | * @descbits: The number of descriptors as a power-of-2 value. |
| 287 | * @avgtextbits: The average text data size per record as a power-of-2 value. |
| 288 | * @avgdictbits: The average dictionary data size per record as a |
| 289 | * power-of-2 value. |
| 290 | * |
| 291 | * This is a macro for defining a ringbuffer and all internal structures |
| 292 | * such that it is ready for immediate use. See _DEFINE_PRINTKRB() for a |
| 293 | * variant where the text data buffer can be specified externally. |
| 294 | */ |
| 295 | #define DEFINE_PRINTKRB(name, descbits, avgtextbits, avgdictbits) \ |
| 296 | static char _##name##_text[1U << ((avgtextbits) + (descbits))] \ |
| 297 | __aligned(__alignof__(unsigned long)); \ |
| 298 | _DEFINE_PRINTKRB(name, descbits, avgtextbits, avgdictbits, &_##name##_text[0]) |
| 299 | |
| 300 | /* Writer Interface */ |
| 301 | |
| 302 | /** |
| 303 | * prb_rec_init_wd() - Initialize a buffer for writing records. |
| 304 | * |
| 305 | * @r: The record to initialize. |
| 306 | * @text_buf_size: The needed text buffer size. |
| 307 | * @dict_buf_size: The needed dictionary buffer size. |
| 308 | * |
| 309 | * Initialize all the fields that a writer is interested in. If |
| 310 | * @dict_buf_size is 0, a dictionary buffer will not be reserved. |
| 311 | * @text_buf_size must be greater than 0. |
| 312 | * |
| 313 | * Note that although @dict_buf_size may be initialized to non-zero, |
| 314 | * its value must be rechecked after a successful call to prb_reserve() |
| 315 | * to verify a dictionary buffer was actually reserved. Dictionary buffer |
| 316 | * reservation is allowed to fail. |
| 317 | */ |
| 318 | static inline void prb_rec_init_wr(struct printk_record *r, |
| 319 | unsigned int text_buf_size, |
| 320 | unsigned int dict_buf_size) |
| 321 | { |
| 322 | r->info = NULL; |
| 323 | r->text_buf = NULL; |
| 324 | r->dict_buf = NULL; |
| 325 | r->text_buf_size = text_buf_size; |
| 326 | r->dict_buf_size = dict_buf_size; |
| 327 | } |
| 328 | |
| 329 | bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, |
| 330 | struct printk_record *r); |
John Ogness | 4cfc725 | 2020-09-14 14:39:53 +0206 | [diff] [blame^] | 331 | bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, |
| 332 | struct printk_record *r, u32 caller_id); |
John Ogness | b6cf8b3 | 2020-07-09 15:29:42 +0206 | [diff] [blame] | 333 | void prb_commit(struct prb_reserved_entry *e); |
John Ogness | 4cfc725 | 2020-09-14 14:39:53 +0206 | [diff] [blame^] | 334 | void prb_final_commit(struct prb_reserved_entry *e); |
John Ogness | b6cf8b3 | 2020-07-09 15:29:42 +0206 | [diff] [blame] | 335 | |
| 336 | void prb_init(struct printk_ringbuffer *rb, |
| 337 | char *text_buf, unsigned int text_buf_size, |
| 338 | char *dict_buf, unsigned int dict_buf_size, |
| 339 | struct prb_desc *descs, unsigned int descs_count_bits); |
| 340 | unsigned int prb_record_text_space(struct prb_reserved_entry *e); |
| 341 | |
| 342 | /* Reader Interface */ |
| 343 | |
| 344 | /** |
| 345 | * prb_rec_init_rd() - Initialize a buffer for reading records. |
| 346 | * |
| 347 | * @r: The record to initialize. |
| 348 | * @info: A buffer to store record meta-data. |
| 349 | * @text_buf: A buffer to store text data. |
| 350 | * @text_buf_size: The size of @text_buf. |
| 351 | * @dict_buf: A buffer to store dictionary data. |
| 352 | * @dict_buf_size: The size of @dict_buf. |
| 353 | * |
| 354 | * Initialize all the fields that a reader is interested in. All arguments |
| 355 | * (except @r) are optional. Only record data for arguments that are |
| 356 | * non-NULL or non-zero will be read. |
| 357 | */ |
| 358 | static inline void prb_rec_init_rd(struct printk_record *r, |
| 359 | struct printk_info *info, |
| 360 | char *text_buf, unsigned int text_buf_size, |
| 361 | char *dict_buf, unsigned int dict_buf_size) |
| 362 | { |
| 363 | r->info = info; |
| 364 | r->text_buf = text_buf; |
| 365 | r->dict_buf = dict_buf; |
| 366 | r->text_buf_size = text_buf_size; |
| 367 | r->dict_buf_size = dict_buf_size; |
| 368 | } |
| 369 | |
| 370 | /** |
| 371 | * prb_for_each_record() - Iterate over the records of a ringbuffer. |
| 372 | * |
| 373 | * @from: The sequence number to begin with. |
| 374 | * @rb: The ringbuffer to iterate over. |
| 375 | * @s: A u64 to store the sequence number on each iteration. |
| 376 | * @r: A printk_record to store the record on each iteration. |
| 377 | * |
| 378 | * This is a macro for conveniently iterating over a ringbuffer. |
| 379 | * Note that @s may not be the sequence number of the record on each |
| 380 | * iteration. For the sequence number, @r->info->seq should be checked. |
| 381 | * |
| 382 | * Context: Any context. |
| 383 | */ |
| 384 | #define prb_for_each_record(from, rb, s, r) \ |
| 385 | for ((s) = from; prb_read_valid(rb, s, r); (s) = (r)->info->seq + 1) |
| 386 | |
| 387 | /** |
| 388 | * prb_for_each_info() - Iterate over the meta data of a ringbuffer. |
| 389 | * |
| 390 | * @from: The sequence number to begin with. |
| 391 | * @rb: The ringbuffer to iterate over. |
| 392 | * @s: A u64 to store the sequence number on each iteration. |
| 393 | * @i: A printk_info to store the record meta data on each iteration. |
| 394 | * @lc: An unsigned int to store the text line count of each record. |
| 395 | * |
| 396 | * This is a macro for conveniently iterating over a ringbuffer. |
| 397 | * Note that @s may not be the sequence number of the record on each |
| 398 | * iteration. For the sequence number, @r->info->seq should be checked. |
| 399 | * |
| 400 | * Context: Any context. |
| 401 | */ |
| 402 | #define prb_for_each_info(from, rb, s, i, lc) \ |
| 403 | for ((s) = from; prb_read_valid_info(rb, s, i, lc); (s) = (i)->seq + 1) |
| 404 | |
| 405 | bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, |
| 406 | struct printk_record *r); |
| 407 | bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, |
| 408 | struct printk_info *info, unsigned int *line_count); |
| 409 | |
| 410 | u64 prb_first_valid_seq(struct printk_ringbuffer *rb); |
| 411 | u64 prb_next_seq(struct printk_ringbuffer *rb); |
| 412 | |
| 413 | #endif /* _KERNEL_PRINTK_RINGBUFFER_H */ |