|
1465 | 1465 | }
|
1466 | 1466 | }
|
1467 | 1467 | },
|
1468 |
| - "gpt_j_6b_clm_weight_only": { |
| 1468 | + "gpt_j_6b_clm_woq": { |
1469 | 1469 | "working_dir": "huggingface/pytorch/language-modeling/quantization",
|
1470 | 1470 | "tune": {
|
1471 | 1471 | "cmd": "bash run_tuning.sh",
|
1472 | 1472 | "params": {
|
1473 |
| - "topology": "gpt_j_weight_only", |
| 1473 | + "topology": "gpt_j_woq", |
1474 | 1474 | "task": "clm",
|
1475 | 1475 | "approach": "weight_only",
|
1476 | 1476 | "output_model": "saved_results"
|
|
1479 | 1479 | "benchmark": {
|
1480 | 1480 | "cmd": "bash run_benchmark.sh",
|
1481 | 1481 | "params": {
|
1482 |
| - "topology": "gpt_j_weight_only", |
| 1482 | + "topology": "gpt_j_woq", |
1483 | 1483 | "task": "clm",
|
1484 | 1484 | "mode": "accuracy",
|
1485 | 1485 | "batch_size": "112",
|
|
1489 | 1489 | }
|
1490 | 1490 | }
|
1491 | 1491 | },
|
1492 |
| - "gpt_j_6b_clm_weight_only_awq": { |
| 1492 | + "gpt_j_6b_clm_woq_awq": { |
1493 | 1493 | "working_dir": "huggingface/pytorch/language-modeling/quantization",
|
1494 | 1494 | "tune": {
|
1495 | 1495 | "cmd": "bash run_tuning.sh",
|
1496 | 1496 | "params": {
|
1497 |
| - "topology": "gpt_j_weight_only_awq", |
| 1497 | + "topology": "gpt_j_woq_awq", |
1498 | 1498 | "task": "clm",
|
1499 | 1499 | "approach": "weight_only",
|
1500 | 1500 | "output_model": "saved_results"
|
|
1503 | 1503 | "benchmark": {
|
1504 | 1504 | "cmd": "bash run_benchmark.sh",
|
1505 | 1505 | "params": {
|
1506 |
| - "topology": "gpt_j_weight_only_awq", |
| 1506 | + "topology": "gpt_j_woq_awq", |
1507 | 1507 | "task": "clm",
|
1508 | 1508 | "mode": "accuracy",
|
1509 | 1509 | "batch_size": "112",
|
|
1592 | 1592 | }
|
1593 | 1593 | }
|
1594 | 1594 | },
|
1595 |
| - "opt_125m_clm_weight_only": { |
| 1595 | + "chatglm_clm_woq": { |
1596 | 1596 | "working_dir": "huggingface/pytorch/language-modeling/quantization",
|
1597 | 1597 | "tune": {
|
1598 | 1598 | "cmd": "bash run_tuning.sh",
|
1599 | 1599 | "params": {
|
1600 |
| - "topology": "opt_125m_weight_only", |
| 1600 | + "topology": "chatglm_woq", |
1601 | 1601 | "task": "clm",
|
1602 | 1602 | "approach": "weight_only",
|
1603 | 1603 | "output_model": "saved_results"
|
|
1606 | 1606 | "benchmark": {
|
1607 | 1607 | "cmd": "bash run_benchmark.sh",
|
1608 | 1608 | "params": {
|
1609 |
| - "topology": "opt_125m_weight_only", |
| 1609 | + "topology": "chatglm_woq", |
1610 | 1610 | "task": "clm",
|
1611 | 1611 | "mode": "accuracy",
|
1612 | 1612 | "batch_size": "112",
|
|
1616 | 1616 | }
|
1617 | 1617 | }
|
1618 | 1618 | },
|
1619 |
| - "opt_125m_clm_weight_only_awq": { |
| 1619 | + "opt_125m_clm_woq": { |
1620 | 1620 | "working_dir": "huggingface/pytorch/language-modeling/quantization",
|
1621 | 1621 | "tune": {
|
1622 | 1622 | "cmd": "bash run_tuning.sh",
|
1623 | 1623 | "params": {
|
1624 |
| - "topology": "opt_125m_weight_only_awq", |
| 1624 | + "topology": "opt_125m_woq", |
1625 | 1625 | "task": "clm",
|
1626 | 1626 | "approach": "weight_only",
|
1627 | 1627 | "output_model": "saved_results"
|
|
1630 | 1630 | "benchmark": {
|
1631 | 1631 | "cmd": "bash run_benchmark.sh",
|
1632 | 1632 | "params": {
|
1633 |
| - "topology": "opt_125m_weight_only_awq", |
| 1633 | + "topology": "opt_125m_woq", |
1634 | 1634 | "task": "clm",
|
1635 | 1635 | "mode": "accuracy",
|
1636 | 1636 | "batch_size": "112",
|
|
1640 | 1640 | }
|
1641 | 1641 | }
|
1642 | 1642 | },
|
1643 |
| - "chatglm_clm_weight_only": { |
| 1643 | + "opt_125m_clm_woq_awq": { |
1644 | 1644 | "working_dir": "huggingface/pytorch/language-modeling/quantization",
|
1645 | 1645 | "tune": {
|
1646 | 1646 | "cmd": "bash run_tuning.sh",
|
1647 | 1647 | "params": {
|
1648 |
| - "topology": "chatglm_weight_only", |
| 1648 | + "topology": "opt_125m_woq_awq", |
1649 | 1649 | "task": "clm",
|
1650 | 1650 | "approach": "weight_only",
|
1651 | 1651 | "output_model": "saved_results"
|
|
1654 | 1654 | "benchmark": {
|
1655 | 1655 | "cmd": "bash run_benchmark.sh",
|
1656 | 1656 | "params": {
|
1657 |
| - "topology": "chatglm_weight_only", |
| 1657 | + "topology": "opt_125m_woq_awq", |
1658 | 1658 | "task": "clm",
|
1659 | 1659 | "mode": "accuracy",
|
1660 | 1660 | "batch_size": "112",
|
|
1664 | 1664 | }
|
1665 | 1665 | }
|
1666 | 1666 | },
|
| 1667 | + "opt_125m_clm_woq_gptq": { |
| 1668 | + "working_dir": "huggingface/pytorch/language-modeling/quantization", |
| 1669 | + "tune": { |
| 1670 | + "cmd": "bash run_tuning.sh", |
| 1671 | + "params": { |
| 1672 | + "topology": "opt_125m_woq_gptq", |
| 1673 | + "task": "clm", |
| 1674 | + "approach": "weight_only", |
| 1675 | + "output_model": "saved_results" |
| 1676 | + } |
| 1677 | + }, |
| 1678 | + "benchmark": { |
| 1679 | + "cmd": "bash run_benchmark.sh", |
| 1680 | + "params": { |
| 1681 | + "topology": "opt_125m_woq_gptq", |
| 1682 | + "task": "clm", |
| 1683 | + "mode": "accuracy", |
| 1684 | + "batch_size": "112", |
| 1685 | + "config": "saved_results", |
| 1686 | + "iters": "100", |
| 1687 | + "int8": "false" |
| 1688 | + } |
| 1689 | + } |
| 1690 | + }, |
| 1691 | + "opt_125m_clm_woq_teq": { |
| 1692 | + "working_dir": "huggingface/pytorch/language-modeling/quantization", |
| 1693 | + "tune": { |
| 1694 | + "cmd": "bash run_tuning.sh", |
| 1695 | + "params": { |
| 1696 | + "topology": "opt_125m_woq_teq", |
| 1697 | + "task": "clm", |
| 1698 | + "approach": "weight_only", |
| 1699 | + "output_model": "saved_results" |
| 1700 | + } |
| 1701 | + }, |
| 1702 | + "benchmark": { |
| 1703 | + "cmd": "bash run_benchmark.sh", |
| 1704 | + "params": { |
| 1705 | + "topology": "opt_125m_woq_teq", |
| 1706 | + "task": "clm", |
| 1707 | + "mode": "accuracy", |
| 1708 | + "batch_size": "112", |
| 1709 | + "config": "saved_results", |
| 1710 | + "iters": "100", |
| 1711 | + "int8": "false" |
| 1712 | + } |
| 1713 | + } |
| 1714 | + }, |
| 1715 | + "opt_125m_clm_ipex": { |
| 1716 | + "working_dir": "huggingface/pytorch/language-modeling/quantization", |
| 1717 | + "tune": { |
| 1718 | + "cmd": "bash run_tuning.sh", |
| 1719 | + "params": { |
| 1720 | + "topology": "opt_125m", |
| 1721 | + "task": "clm", |
| 1722 | + "approach": "static", |
| 1723 | + "backend": "ipex", |
| 1724 | + "output_model": "saved_results" |
| 1725 | + } |
| 1726 | + }, |
| 1727 | + "benchmark": { |
| 1728 | + "cmd": "bash run_benchmark.sh", |
| 1729 | + "params": { |
| 1730 | + "topology": "opt_125m", |
| 1731 | + "task": "clm", |
| 1732 | + "approach": "static", |
| 1733 | + "backend": "ipex", |
| 1734 | + "mode": "accuracy", |
| 1735 | + "batch_size": "112", |
| 1736 | + "iters": "100", |
| 1737 | + "int8": "false", |
| 1738 | + "config": "saved_results" |
| 1739 | + } |
| 1740 | + } |
| 1741 | + }, |
1667 | 1742 | "opt_1.3b_clm_ipex": {
|
1668 | 1743 | "working_dir": "huggingface/pytorch/language-modeling/quantization",
|
1669 | 1744 | "tune": {
|
|
0 commit comments