|
32 | 32 | "import tensorflow_hub as hub\n", |
33 | 33 | "from datetime import datetime\n", |
34 | 34 | "import requests\n", |
35 | | - "from copy import deepcopy\n", |
36 | 35 | "print(\"We are using Tensorflow version: \", tf.__version__)" |
37 | 36 | ] |
38 | 37 | }, |
|
443 | 442 | "id": "8a03faef", |
444 | 443 | "metadata": {}, |
445 | 444 | "source": [ |
446 | | - "Let's measure the performance of the model we just saved using the `tf_benchmark.py` script that runs inference on dummy data." |
| 445 | + "Let's measure the performance of the model we just saved using the `tf_benchmark.py` script that runs inference on dummy data.\n", |
| 446 | + "\n", |
| 447 | + "_Note: We only use the auto-mixed precision policy if the underlying system is the 4th Gen Intel® Xeon® scalable processor (codenamed Sapphire Rapids)_" |
| 448 | + ] |
| 449 | + }, |
| 450 | + { |
| 451 | + "cell_type": "code", |
| 452 | + "execution_count": null, |
| 453 | + "id": "db6aa4b4", |
| 454 | + "metadata": {}, |
| 455 | + "outputs": [], |
| 456 | + "source": [ |
| 457 | + "if arch == 'SPR':\n", |
| 458 | + " PRECISION = \"bfloat16\"\n", |
| 459 | + "else:\n", |
| 460 | + " PRECISION = \"float32\"\n", |
| 461 | + "print(\"Precision for inference: \", PRECISION)" |
447 | 462 | ] |
448 | 463 | }, |
449 | 464 | { |
450 | 465 | "cell_type": "code", |
451 | 466 | "execution_count": null, |
452 | 467 | "id": "fd855747", |
453 | | - "metadata": { |
454 | | - "scrolled": false |
455 | | - }, |
| 468 | + "metadata": {}, |
456 | 469 | "outputs": [], |
457 | 470 | "source": [ |
458 | | - "run scripts/tf_benchmark.py --model_path models/my_saved_model --num_warmup 5 --num_iter 50 --precision float32 --batch_size 32 --disable_optimize" |
| 471 | + "!python scripts/tf_benchmark.py --model_path models/my_saved_model --num_warmup 5 --num_iter 50 --precision PRECISION --batch_size 32 --disable_optimize" |
459 | 472 | ] |
460 | 473 | }, |
461 | 474 | { |
|
486 | 499 | "metadata": {}, |
487 | 500 | "outputs": [], |
488 | 501 | "source": [ |
489 | | - "run scripts/freeze_optimize_v2.py --input_saved_model_dir=models/my_saved_model --output_saved_model_dir=models/my_optimized_model" |
| 502 | + "!python scripts/freeze_optimize_v2.py --input_saved_model_dir=models/my_saved_model --output_saved_model_dir=models/my_optimized_model" |
490 | 503 | ] |
491 | 504 | }, |
492 | 505 | { |
|
501 | 514 | "cell_type": "code", |
502 | 515 | "execution_count": null, |
503 | 516 | "id": "480dddda", |
504 | | - "metadata": { |
505 | | - "scrolled": false |
506 | | - }, |
| 517 | + "metadata": {}, |
507 | 518 | "outputs": [], |
508 | 519 | "source": [ |
509 | | - "run scripts/tf_benchmark.py --model_path models/my_optimized_model --num_warmup 5 --num_iter 50 --precision float32 --batch_size 32" |
| 520 | + "!python scripts/tf_benchmark.py --model_path models/my_optimized_model --num_warmup 5 --num_iter 50 --precision PRECISION --batch_size 32" |
510 | 521 | ] |
511 | 522 | }, |
512 | 523 | { |
|
526 | 537 | "metadata": {}, |
527 | 538 | "outputs": [], |
528 | 539 | "source": [ |
529 | | - "run scripts/plot.py" |
530 | | - ] |
531 | | - }, |
532 | | - { |
533 | | - "cell_type": "markdown", |
534 | | - "id": "8157a5ec", |
535 | | - "metadata": {}, |
536 | | - "source": [ |
537 | | - "### TensorFlow Serving\n", |
538 | | - "\n", |
539 | | - "In this section, we will initialize and run TensorFlow Serving natively to serve our retrained model." |
540 | | - ] |
541 | | - }, |
542 | | - { |
543 | | - "cell_type": "code", |
544 | | - "execution_count": null, |
545 | | - "id": "6a00c32d", |
546 | | - "metadata": {}, |
547 | | - "outputs": [], |
548 | | - "source": [ |
549 | | - "!mkdir serving\n", |
550 | | - "!cp -r models/my_optimized_model serving/1" |
551 | | - ] |
552 | | - }, |
553 | | - { |
554 | | - "cell_type": "code", |
555 | | - "execution_count": null, |
556 | | - "id": "a45b5438", |
557 | | - "metadata": {}, |
558 | | - "outputs": [], |
559 | | - "source": [ |
560 | | - "os.environ[\"MODEL_DIR\"] = os.getcwd() + \"/serving\"" |
561 | | - ] |
562 | | - }, |
563 | | - { |
564 | | - "cell_type": "markdown", |
565 | | - "id": "edcd77c4", |
566 | | - "metadata": {}, |
567 | | - "source": [ |
568 | | - "This is where we start running TensorFlow Serving and load our model. After it loads we can start making inference requests using REST. There are some important parameters:\n", |
569 | | - "- **rest_api_port**: The port that you'll use for REST requests.\n", |
570 | | - "- **model_name**: You'll use this in the URL of REST requests. It can be anything.\n", |
571 | | - "- **model_base_path**: This is the path to the directory where you've saved your model." |
572 | | - ] |
573 | | - }, |
574 | | - { |
575 | | - "cell_type": "code", |
576 | | - "execution_count": null, |
577 | | - "id": "34aee14f", |
578 | | - "metadata": {}, |
579 | | - "outputs": [], |
580 | | - "source": [ |
581 | | - "%%bash --bg\n", |
582 | | - "nohup tensorflow_model_server --rest_api_port=8501 --model_name=rn50 --model_base_path=${MODEL_DIR} > server.log 2>&1" |
583 | | - ] |
584 | | - }, |
585 | | - { |
586 | | - "cell_type": "code", |
587 | | - "execution_count": null, |
588 | | - "id": "e486894a", |
589 | | - "metadata": {}, |
590 | | - "outputs": [], |
591 | | - "source": [ |
592 | | - "!tail server.log" |
593 | | - ] |
594 | | - }, |
595 | | - { |
596 | | - "cell_type": "markdown", |
597 | | - "id": "7dc7606d", |
598 | | - "metadata": {}, |
599 | | - "source": [ |
600 | | - "**Prepare the testing data for prediction**" |
| 540 | + "!python scripts/plot.py" |
601 | 541 | ] |
602 | 542 | }, |
603 | 543 | { |
604 | 544 | "cell_type": "code", |
605 | 545 | "execution_count": null, |
606 | | - "id": "c9dfa9d8", |
| 546 | + "id": "7c1bd119-ffc1-4761-a614-c2ffd83e6b4c", |
607 | 547 | "metadata": {}, |
608 | 548 | "outputs": [], |
609 | | - "source": [ |
610 | | - "for image_batch, labels_batch in val_ds:\n", |
611 | | - " print(image_batch.shape)\n", |
612 | | - " print(labels_batch.shape)\n", |
613 | | - " break\n", |
614 | | - "test_data, test_labels = image_batch.numpy(), labels_batch.numpy()" |
615 | | - ] |
616 | | - }, |
617 | | - { |
618 | | - "cell_type": "markdown", |
619 | | - "id": "5d4e5f62", |
620 | | - "metadata": {}, |
621 | | - "source": [ |
622 | | - "First, let's take a look at a random example from our test data." |
623 | | - ] |
624 | | - }, |
625 | | - { |
626 | | - "cell_type": "code", |
627 | | - "execution_count": null, |
628 | | - "id": "e2761dcf", |
629 | | - "metadata": {}, |
630 | | - "outputs": [], |
631 | | - "source": [ |
632 | | - "import matplotlib.pyplot as plt\n", |
633 | | - "\n", |
634 | | - "def show(idx, title):\n", |
635 | | - " plt.figure()\n", |
636 | | - " plt.imshow(test_data[idx])\n", |
637 | | - " plt.axis('off')\n", |
638 | | - " plt.title('\\n\\n{}'.format(title), fontdict={'size': 16})\n", |
639 | | - "\n", |
640 | | - "import random\n", |
641 | | - "rando = random.randint(0,test_data.shape[0]-1)\n", |
642 | | - "show(rando, 'An Example Image:')" |
643 | | - ] |
644 | | - }, |
645 | | - { |
646 | | - "cell_type": "markdown", |
647 | | - "id": "3b362658", |
648 | | - "metadata": {}, |
649 | | - "source": [ |
650 | | - "#### Make a request to your model in TensorFlow Serving\n", |
651 | | - "\n", |
652 | | - "Now let's create the JSON object for a batch of three inference requests, and see how well our model recognizes things:" |
653 | | - ] |
654 | | - }, |
655 | | - { |
656 | | - "cell_type": "code", |
657 | | - "execution_count": null, |
658 | | - "id": "831bf2d1", |
659 | | - "metadata": { |
660 | | - "scrolled": true |
661 | | - }, |
662 | | - "outputs": [], |
663 | | - "source": [ |
664 | | - "import json\n", |
665 | | - "data = json.dumps({\"signature_name\": \"serving_default\", \"instances\": test_data[0:3].tolist()})\n", |
666 | | - "print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))" |
667 | | - ] |
668 | | - }, |
669 | | - { |
670 | | - "cell_type": "markdown", |
671 | | - "id": "427f3c8b", |
672 | | - "metadata": {}, |
673 | | - "source": [ |
674 | | - "#### Make REST requests\n", |
675 | | - "\n", |
676 | | - "We'll send a predict request as a POST to our server's REST endpoint, and pass it three examples." |
677 | | - ] |
678 | | - }, |
679 | | - { |
680 | | - "cell_type": "code", |
681 | | - "execution_count": null, |
682 | | - "id": "3d7f5e5e", |
683 | | - "metadata": {}, |
684 | | - "outputs": [], |
685 | | - "source": [ |
686 | | - "headers = {\"content-type\": \"application/json\"}\n", |
687 | | - "json_response = requests.post('http://localhost:8501/v1/models/rn50:predict', data=data, headers=headers)\n", |
688 | | - "predictions = json.loads(json_response.text)['predictions']\n", |
689 | | - "\n", |
690 | | - "for i in range(0,3):\n", |
691 | | - " show(i, 'The model thought this was a {} (class {}), and it was actually a {} (class {})'.format(\n", |
692 | | - " class_names[np.argmax(predictions[i])], np.argmax(predictions[i]), class_names[test_labels[i]], test_labels[i]))" |
693 | | - ] |
| 549 | + "source": [] |
694 | 550 | } |
695 | 551 | ], |
696 | 552 | "metadata": { |
| 553 | + "kernelspec": { |
| 554 | + "display_name": "Python 3 (ipykernel)", |
| 555 | + "language": "python", |
| 556 | + "name": "python3" |
| 557 | + }, |
697 | 558 | "language_info": { |
698 | 559 | "codemirror_mode": { |
699 | 560 | "name": "ipython", |
|
704 | 565 | "name": "python", |
705 | 566 | "nbconvert_exporter": "python", |
706 | 567 | "pygments_lexer": "ipython3", |
707 | | - "version": "3.8.12" |
| 568 | + "version": "3.10.12" |
708 | 569 | } |
709 | 570 | }, |
710 | 571 | "nbformat": 4, |
|
0 commit comments