|
13 | 13 | #include <linux/sched.h> |
14 | 14 | #include <linux/smp.h> |
15 | 15 | #include <linux/memblock.h> |
| 16 | +#include <linux/minmax.h> |
16 | 17 | #include <linux/mm.h> |
17 | 18 | #include <linux/hugetlb.h> |
18 | 19 | #include <linux/export.h> |
|
24 | 25 | #include <asm/hazards.h> |
25 | 26 | #include <asm/mmu_context.h> |
26 | 27 | #include <asm/tlb.h> |
| 28 | +#include <asm/tlbdebug.h> |
27 | 29 | #include <asm/tlbex.h> |
28 | 30 | #include <asm/tlbmisc.h> |
29 | 31 | #include <asm/setup.h> |
@@ -511,87 +513,259 @@ static int __init set_ntlb(char *str) |
511 | 513 | __setup("ntlb=", set_ntlb); |
512 | 514 |
|
513 | 515 |
|
514 | | -/* Comparison function for EntryHi VPN fields. */ |
515 | | -static int r4k_vpn_cmp(const void *a, const void *b) |
| 516 | +/* The start bit position of VPN2 and Mask in EntryHi/PageMask registers. */ |
| 517 | +#define VPN2_SHIFT 13 |
| 518 | + |
| 519 | +/* Read full EntryHi even with CONFIG_32BIT. */ |
| 520 | +static inline unsigned long long read_c0_entryhi_native(void) |
| 521 | +{ |
| 522 | + return cpu_has_64bits ? read_c0_entryhi_64() : read_c0_entryhi(); |
| 523 | +} |
| 524 | + |
| 525 | +/* Write full EntryHi even with CONFIG_32BIT. */ |
| 526 | +static inline void write_c0_entryhi_native(unsigned long long v) |
516 | 527 | { |
517 | | - long v = *(unsigned long *)a - *(unsigned long *)b; |
518 | | - int s = sizeof(long) > sizeof(int) ? sizeof(long) * 8 - 1: 0; |
519 | | - return s ? (v != 0) | v >> s : v; |
| 528 | + if (cpu_has_64bits) |
| 529 | + write_c0_entryhi_64(v); |
| 530 | + else |
| 531 | + write_c0_entryhi(v); |
520 | 532 | } |
521 | 533 |
|
| 534 | +/* TLB entry state for uniquification. */ |
| 535 | +struct tlbent { |
| 536 | + unsigned long long wired:1; |
| 537 | + unsigned long long global:1; |
| 538 | + unsigned long long asid:10; |
| 539 | + unsigned long long vpn:51; |
| 540 | + unsigned long long pagesz:5; |
| 541 | + unsigned long long index:14; |
| 542 | +}; |
| 543 | + |
522 | 544 | /* |
523 | | - * Initialise all TLB entries with unique values that do not clash with |
524 | | - * what we have been handed over and what we'll be using ourselves. |
| 545 | + * Comparison function for TLB entry sorting. Place wired entries first, |
| 546 | + * then global entries, then order by the increasing VPN/ASID and the |
| 547 | + * decreasing page size. This lets us avoid clashes with wired entries |
| 548 | + * easily and get entries for larger pages out of the way first. |
| 549 | + * |
| 550 | + * We could group bits so as to reduce the number of comparisons, but this |
| 551 | + * is seldom executed and not performance-critical, so prefer legibility. |
525 | 552 | */ |
526 | | -static void __ref r4k_tlb_uniquify(void) |
| 553 | +static int r4k_entry_cmp(const void *a, const void *b) |
527 | 554 | { |
528 | | - int tlbsize = current_cpu_data.tlbsize; |
529 | | - bool use_slab = slab_is_available(); |
530 | | - int start = num_wired_entries(); |
531 | | - phys_addr_t tlb_vpn_size; |
532 | | - unsigned long *tlb_vpns; |
533 | | - unsigned long vpn_mask; |
534 | | - int cnt, ent, idx, i; |
535 | | - |
536 | | - vpn_mask = GENMASK(cpu_vmbits - 1, 13); |
537 | | - vpn_mask |= IS_ENABLED(CONFIG_64BIT) ? 3ULL << 62 : 1 << 31; |
| 555 | + struct tlbent ea = *(struct tlbent *)a, eb = *(struct tlbent *)b; |
| 556 | + |
| 557 | + if (ea.wired > eb.wired) |
| 558 | + return -1; |
| 559 | + else if (ea.wired < eb.wired) |
| 560 | + return 1; |
| 561 | + else if (ea.global > eb.global) |
| 562 | + return -1; |
| 563 | + else if (ea.global < eb.global) |
| 564 | + return 1; |
| 565 | + else if (ea.vpn < eb.vpn) |
| 566 | + return -1; |
| 567 | + else if (ea.vpn > eb.vpn) |
| 568 | + return 1; |
| 569 | + else if (ea.asid < eb.asid) |
| 570 | + return -1; |
| 571 | + else if (ea.asid > eb.asid) |
| 572 | + return 1; |
| 573 | + else if (ea.pagesz > eb.pagesz) |
| 574 | + return -1; |
| 575 | + else if (ea.pagesz < eb.pagesz) |
| 576 | + return 1; |
| 577 | + else |
| 578 | + return 0; |
| 579 | +} |
538 | 580 |
|
539 | | - tlb_vpn_size = tlbsize * sizeof(*tlb_vpns); |
540 | | - tlb_vpns = (use_slab ? |
541 | | - kmalloc(tlb_vpn_size, GFP_KERNEL) : |
542 | | - memblock_alloc_raw(tlb_vpn_size, sizeof(*tlb_vpns))); |
543 | | - if (WARN_ON(!tlb_vpns)) |
544 | | - return; /* Pray local_flush_tlb_all() is good enough. */ |
| 581 | +/* |
| 582 | + * Fetch all the TLB entries. Mask individual VPN values retrieved with |
| 583 | + * the corresponding page mask and ignoring any 1KiB extension as we'll |
| 584 | + * be using 4KiB pages for uniquification. |
| 585 | + */ |
| 586 | +static void __ref r4k_tlb_uniquify_read(struct tlbent *tlb_vpns, int tlbsize) |
| 587 | +{ |
| 588 | + int start = num_wired_entries(); |
| 589 | + unsigned long long vpn_mask; |
| 590 | + bool global; |
| 591 | + int i; |
545 | 592 |
|
546 | | - htw_stop(); |
| 593 | + vpn_mask = GENMASK(current_cpu_data.vmbits - 1, VPN2_SHIFT); |
| 594 | + vpn_mask |= cpu_has_64bits ? 3ULL << 62 : 1 << 31; |
547 | 595 |
|
548 | | - for (i = start, cnt = 0; i < tlbsize; i++, cnt++) { |
549 | | - unsigned long vpn; |
| 596 | + for (i = 0; i < tlbsize; i++) { |
| 597 | + unsigned long long entryhi, vpn, mask, asid; |
| 598 | + unsigned int pagesz; |
550 | 599 |
|
551 | 600 | write_c0_index(i); |
552 | 601 | mtc0_tlbr_hazard(); |
553 | 602 | tlb_read(); |
554 | 603 | tlb_read_hazard(); |
555 | | - vpn = read_c0_entryhi(); |
556 | | - vpn &= vpn_mask & PAGE_MASK; |
557 | | - tlb_vpns[cnt] = vpn; |
558 | 604 |
|
559 | | - /* Prevent any large pages from overlapping regular ones. */ |
560 | | - write_c0_pagemask(read_c0_pagemask() & PM_DEFAULT_MASK); |
561 | | - mtc0_tlbw_hazard(); |
562 | | - tlb_write_indexed(); |
563 | | - tlbw_use_hazard(); |
| 605 | + global = !!(read_c0_entrylo0() & ENTRYLO_G); |
| 606 | + entryhi = read_c0_entryhi_native(); |
| 607 | + mask = read_c0_pagemask(); |
| 608 | + |
| 609 | + asid = entryhi & cpu_asid_mask(¤t_cpu_data); |
| 610 | + vpn = (entryhi & vpn_mask & ~mask) >> VPN2_SHIFT; |
| 611 | + pagesz = ilog2((mask >> VPN2_SHIFT) + 1); |
| 612 | + |
| 613 | + tlb_vpns[i].global = global; |
| 614 | + tlb_vpns[i].asid = global ? 0 : asid; |
| 615 | + tlb_vpns[i].vpn = vpn; |
| 616 | + tlb_vpns[i].pagesz = pagesz; |
| 617 | + tlb_vpns[i].wired = i < start; |
| 618 | + tlb_vpns[i].index = i; |
564 | 619 | } |
| 620 | +} |
565 | 621 |
|
566 | | - sort(tlb_vpns, cnt, sizeof(tlb_vpns[0]), r4k_vpn_cmp, NULL); |
| 622 | +/* |
| 623 | + * Write unique values to all but the wired TLB entries each, using |
| 624 | + * the 4KiB page size. This size might not be supported with R6, but |
| 625 | + * EHINV is mandatory for R6, so we won't ever be called in that case. |
| 626 | + * |
| 627 | + * A sorted table is supplied with any wired entries at the beginning, |
| 628 | + * followed by any global entries, and then finally regular entries. |
| 629 | + * We start at the VPN and ASID values of zero and only assign user |
| 630 | + * addresses, therefore guaranteeing no clash with addresses produced |
| 631 | + * by UNIQUE_ENTRYHI. We avoid any VPN values used by wired or global |
| 632 | + * entries, by increasing the VPN value beyond the span of such entry. |
| 633 | + * |
| 634 | + * When a VPN/ASID clash is found with a regular entry we increment the |
| 635 | + * ASID instead until no VPN/ASID clash has been found or the ASID space |
| 636 | + * has been exhausted, in which case we increase the VPN value beyond |
| 637 | + * the span of the largest clashing entry. |
| 638 | + * |
| 639 | + * We do not need to be concerned about FTLB or MMID configurations as |
| 640 | + * those are required to implement the EHINV feature. |
| 641 | + */ |
| 642 | +static void __ref r4k_tlb_uniquify_write(struct tlbent *tlb_vpns, int tlbsize) |
| 643 | +{ |
| 644 | + unsigned long long asid, vpn, vpn_size, pagesz; |
| 645 | + int widx, gidx, idx, sidx, lidx, i; |
567 | 646 |
|
568 | | - write_c0_pagemask(PM_DEFAULT_MASK); |
| 647 | + vpn_size = 1ULL << (current_cpu_data.vmbits - VPN2_SHIFT); |
| 648 | + pagesz = ilog2((PM_4K >> VPN2_SHIFT) + 1); |
| 649 | + |
| 650 | + write_c0_pagemask(PM_4K); |
569 | 651 | write_c0_entrylo0(0); |
570 | 652 | write_c0_entrylo1(0); |
571 | 653 |
|
572 | | - idx = 0; |
573 | | - ent = tlbsize; |
574 | | - for (i = start; i < tlbsize; i++) |
575 | | - while (1) { |
576 | | - unsigned long entryhi, vpn; |
| 654 | + asid = 0; |
| 655 | + vpn = 0; |
| 656 | + widx = 0; |
| 657 | + gidx = 0; |
| 658 | + for (sidx = 0; sidx < tlbsize && tlb_vpns[sidx].wired; sidx++) |
| 659 | + ; |
| 660 | + for (lidx = sidx; lidx < tlbsize && tlb_vpns[lidx].global; lidx++) |
| 661 | + ; |
| 662 | + idx = gidx = sidx + 1; |
| 663 | + for (i = sidx; i < tlbsize; i++) { |
| 664 | + unsigned long long entryhi, vpn_pagesz = 0; |
577 | 665 |
|
578 | | - entryhi = UNIQUE_ENTRYHI(ent); |
579 | | - vpn = entryhi & vpn_mask & PAGE_MASK; |
| 666 | + while (1) { |
| 667 | + if (WARN_ON(vpn >= vpn_size)) { |
| 668 | + dump_tlb_all(); |
| 669 | + /* Pray local_flush_tlb_all() will cope. */ |
| 670 | + return; |
| 671 | + } |
580 | 672 |
|
581 | | - if (idx >= cnt || vpn < tlb_vpns[idx]) { |
582 | | - write_c0_entryhi(entryhi); |
583 | | - write_c0_index(i); |
584 | | - mtc0_tlbw_hazard(); |
585 | | - tlb_write_indexed(); |
586 | | - ent++; |
587 | | - break; |
588 | | - } else if (vpn == tlb_vpns[idx]) { |
589 | | - ent++; |
590 | | - } else { |
| 673 | + /* VPN must be below the next wired entry. */ |
| 674 | + if (widx < sidx && vpn >= tlb_vpns[widx].vpn) { |
| 675 | + vpn = max(vpn, |
| 676 | + (tlb_vpns[widx].vpn + |
| 677 | + (1ULL << tlb_vpns[widx].pagesz))); |
| 678 | + asid = 0; |
| 679 | + widx++; |
| 680 | + continue; |
| 681 | + } |
| 682 | + /* VPN must be below the next global entry. */ |
| 683 | + if (gidx < lidx && vpn >= tlb_vpns[gidx].vpn) { |
| 684 | + vpn = max(vpn, |
| 685 | + (tlb_vpns[gidx].vpn + |
| 686 | + (1ULL << tlb_vpns[gidx].pagesz))); |
| 687 | + asid = 0; |
| 688 | + gidx++; |
| 689 | + continue; |
| 690 | + } |
| 691 | + /* Try to find a free ASID so as to conserve VPNs. */ |
| 692 | + if (idx < tlbsize && vpn == tlb_vpns[idx].vpn && |
| 693 | + asid == tlb_vpns[idx].asid) { |
| 694 | + unsigned long long idx_pagesz; |
| 695 | + |
| 696 | + idx_pagesz = tlb_vpns[idx].pagesz; |
| 697 | + vpn_pagesz = max(vpn_pagesz, idx_pagesz); |
| 698 | + do |
| 699 | + idx++; |
| 700 | + while (idx < tlbsize && |
| 701 | + vpn == tlb_vpns[idx].vpn && |
| 702 | + asid == tlb_vpns[idx].asid); |
| 703 | + asid++; |
| 704 | + if (asid > cpu_asid_mask(¤t_cpu_data)) { |
| 705 | + vpn += vpn_pagesz; |
| 706 | + asid = 0; |
| 707 | + vpn_pagesz = 0; |
| 708 | + } |
| 709 | + continue; |
| 710 | + } |
| 711 | + /* VPN mustn't be above the next regular entry. */ |
| 712 | + if (idx < tlbsize && vpn > tlb_vpns[idx].vpn) { |
| 713 | + vpn = max(vpn, |
| 714 | + (tlb_vpns[idx].vpn + |
| 715 | + (1ULL << tlb_vpns[idx].pagesz))); |
| 716 | + asid = 0; |
591 | 717 | idx++; |
| 718 | + continue; |
592 | 719 | } |
| 720 | + break; |
593 | 721 | } |
594 | 722 |
|
| 723 | + entryhi = (vpn << VPN2_SHIFT) | asid; |
| 724 | + write_c0_entryhi_native(entryhi); |
| 725 | + write_c0_index(tlb_vpns[i].index); |
| 726 | + mtc0_tlbw_hazard(); |
| 727 | + tlb_write_indexed(); |
| 728 | + |
| 729 | + tlb_vpns[i].asid = asid; |
| 730 | + tlb_vpns[i].vpn = vpn; |
| 731 | + tlb_vpns[i].pagesz = pagesz; |
| 732 | + |
| 733 | + asid++; |
| 734 | + if (asid > cpu_asid_mask(¤t_cpu_data)) { |
| 735 | + vpn += 1ULL << pagesz; |
| 736 | + asid = 0; |
| 737 | + } |
| 738 | + } |
| 739 | +} |
| 740 | + |
| 741 | +/* |
| 742 | + * Initialise all TLB entries with unique values that do not clash with |
| 743 | + * what we have been handed over and what we'll be using ourselves. |
| 744 | + */ |
| 745 | +static void __ref r4k_tlb_uniquify(void) |
| 746 | +{ |
| 747 | + int tlbsize = current_cpu_data.tlbsize; |
| 748 | + bool use_slab = slab_is_available(); |
| 749 | + phys_addr_t tlb_vpn_size; |
| 750 | + struct tlbent *tlb_vpns; |
| 751 | + |
| 752 | + tlb_vpn_size = tlbsize * sizeof(*tlb_vpns); |
| 753 | + tlb_vpns = (use_slab ? |
| 754 | + kmalloc(tlb_vpn_size, GFP_KERNEL) : |
| 755 | + memblock_alloc_raw(tlb_vpn_size, sizeof(*tlb_vpns))); |
| 756 | + if (WARN_ON(!tlb_vpns)) |
| 757 | + return; /* Pray local_flush_tlb_all() is good enough. */ |
| 758 | + |
| 759 | + htw_stop(); |
| 760 | + |
| 761 | + r4k_tlb_uniquify_read(tlb_vpns, tlbsize); |
| 762 | + |
| 763 | + sort(tlb_vpns, tlbsize, sizeof(*tlb_vpns), r4k_entry_cmp, NULL); |
| 764 | + |
| 765 | + r4k_tlb_uniquify_write(tlb_vpns, tlbsize); |
| 766 | + |
| 767 | + write_c0_pagemask(PM_DEFAULT_MASK); |
| 768 | + |
595 | 769 | tlbw_use_hazard(); |
596 | 770 | htw_start(); |
597 | 771 | flush_micro_tlb(); |
|
0 commit comments