原文: https://pytorch.org/tutorials/advanced/dynamic_quantization_tutorial.html
作者: James Reed
由編輯:賽斯·魏德曼
量化涉及將模型的權(quán)重和激活從 float 轉(zhuǎn)換為 int,這可能會(huì)導(dǎo)致模型尺寸更小,推斷速度更快,而對(duì)準(zhǔn)確性的影響很小。
在本教程中,我們將最簡(jiǎn)單的量化形式-動(dòng)態(tài)量化應(yīng)用于基于 LSTM 的下一個(gè)單詞預(yù)測(cè)模型,緊緊遵循 PyTorch 示例中的單詞語(yǔ)言模型 。
# imports
import os
from io import open
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
在這里,我們根據(jù)詞語(yǔ)言模型示例中的模型定義 LSTM 模型體系結(jié)構(gòu)。
class LSTMModel(nn.Module):
"""Container module with an encoder, a recurrent module, and a decoder."""
def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5):
super(LSTMModel, self).__init__()
self.drop = nn.Dropout(dropout)
self.encoder = nn.Embedding(ntoken, ninp)
self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout)
self.decoder = nn.Linear(nhid, ntoken)
self.init_weights()
self.nhid = nhid
self.nlayers = nlayers
def init_weights(self):
initrange = 0.1
self.encoder.weight.data.uniform_(-initrange, initrange)
self.decoder.bias.data.zero_()
self.decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, input, hidden):
emb = self.drop(self.encoder(input))
output, hidden = self.rnn(emb, hidden)
output = self.drop(output)
decoded = self.decoder(output)
return decoded, hidden
def init_hidden(self, bsz):
weight = next(self.parameters())
return (weight.new_zeros(self.nlayers, bsz, self.nhid),
weight.new_zeros(self.nlayers, bsz, self.nhid))
接下來(lái),我們?cè)俅胃鶕?jù)單詞模型示例對(duì)預(yù)處理,將 Wikitext-2 數(shù)據(jù)集 加載到<cite>
語(yǔ)料庫(kù)</cite>
中。
class Dictionary(object):
def __init__(self):
self.word2idx = {}
self.idx2word = []
def add_word(self, word):
if word not in self.word2idx:
self.idx2word.append(word)
self.word2idx[word] = len(self.idx2word) - 1
return self.word2idx[word]
def __len__(self):
return len(self.idx2word)
class Corpus(object):
def __init__(self, path):
self.dictionary = Dictionary()
self.train = self.tokenize(os.path.join(path, 'train.txt'))
self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
self.test = self.tokenize(os.path.join(path, 'test.txt'))
def tokenize(self, path):
"""Tokenizes a text file."""
assert os.path.exists(path)
# Add words to the dictionary
with open(path, 'r', encoding="utf8") as f:
for line in f:
words = line.split() + ['<eos>']
for word in words:
self.dictionary.add_word(word)
# Tokenize file content
with open(path, 'r', encoding="utf8") as f:
idss = []
for line in f:
words = line.split() + ['<eos>']
ids = []
for word in words:
ids.append(self.dictionary.word2idx[word])
idss.append(torch.tensor(ids).type(torch.int64))
ids = torch.cat(idss)
return ids
model_data_filepath = 'data/'
corpus = Corpus(model_data_filepath + 'wikitext-2')
這是有關(guān)動(dòng)態(tài)量化的教程,動(dòng)態(tài)量化是在訓(xùn)練模型后應(yīng)用的一種量化技術(shù)。 因此,我們只需將一些預(yù)先訓(xùn)練的權(quán)重加載到此模型架構(gòu)中即可; 這些權(quán)重是通過(guò)使用單詞語(yǔ)言模型示例中的默認(rèn)設(shè)置訓(xùn)練五個(gè)紀(jì)元而獲得的。
ntokens = len(corpus.dictionary)
model = LSTMModel(
ntoken = ntokens,
ninp = 512,
nhid = 256,
nlayers = 5,
)
model.load_state_dict(
torch.load(
model_data_filepath + 'word_language_model_quantize.pth',
map_location=torch.device('cpu')
)
)
model.eval()
print(model)
得出:
LSTMModel(
(drop): Dropout(p=0.5, inplace=False)
(encoder): Embedding(33278, 512)
(rnn): LSTM(512, 256, num_layers=5, dropout=0.5)
(decoder): Linear(in_features=256, out_features=33278, bias=True)
)
現(xiàn)在,我們生成一些文本以確保預(yù)先訓(xùn)練的模型能夠正常工作-與以前類似,我們?cè)诖颂幾裱?
input_ = torch.randint(ntokens, (1, 1), dtype=torch.long)
hidden = model.init_hidden(1)
temperature = 1.0
num_words = 1000
with open(model_data_filepath + 'out.txt', 'w') as outf:
with torch.no_grad(): # no tracking history
for i in range(num_words):
output, hidden = model(input_, hidden)
word_weights = output.squeeze().div(temperature).exp().cpu()
word_idx = torch.multinomial(word_weights, 1)[0]
input_.fill_(word_idx)
word = corpus.dictionary.idx2word[word_idx]
outf.write(str(word.encode('utf-8')) + ('\n' if i % 20 == 19 else ' '))
if i % 100 == 0:
print('| Generated {}/{} words'.format(i, 1000))
with open(model_data_filepath + 'out.txt', 'r') as outf:
all_output = outf.read()
print(all_output)
得出:
| Generated 0/1000 words
| Generated 100/1000 words
| Generated 200/1000 words
| Generated 300/1000 words
| Generated 400/1000 words
| Generated 500/1000 words
| Generated 600/1000 words
| Generated 700/1000 words
| Generated 800/1000 words
| Generated 900/1000 words
b'and' b'O' b'\xe2\x80\x99' b'Gacy' b',' b'and' b'then' b'defined' b'that' b'next' b'novel' b'succeeded' b'large' b'property' b',' b'so' b'neither' b'number' b'is' b'currently'
b'a' b'identical' b'planet' b'by' b'stiff' b'culture' b'.' b'Mosley' b'may' b'settle' b'in' b'non' b'@-@' b'bands' b'for' b'the' b'beginning' b'of' b'its' b'home'
b'stations' b',' b'being' b'also' b'in' b'charge' b'for' b'two' b'other' b'@-@' b'month' b'ceremonies' b'.' b'The' b'first' b'Star' b'Overseas' b'took' b'to' b'have'
b'met' b'its' b'leadership' b'for' b'investigation' b'such' b'as' b'Discovered' b'lbw' b',' b'club' b',' b'<unk>' b',' b'<unk>' b',' b'or' b'Crac' b"'Malley" b','
b'although' b'with' b'the' b'other' b'victory' b',' b'assumes' b'it' b'.' b'(' b'not' b'containment' b'to' b'a' b'recent' b'problem' b')' b'.' b'His' b'traditional'
b'scheme' b'process' b'is' b'proceeded' b'outdoor' b'in' b'overweight' b'clusters' b';' b'God' b'Davis' b'was' b'interested' b'on' b'her' b'right' b'touring' b',' b'although' b'they'
b'had' b'previously' b'previously' b'risen' b'near' b'eclipse' b'in' b'his' b'work' b'by' b'the' b'latter' b'@-@' b'perspective' b'.' b'During' b'the' b'release' b'of' b'Bell'
b',' b'the' b'first' b'promotional' b'mention' b'included' b'a' b'Magnetic' b'seam' b'was' b'put' b'into' b'Shakespeare' b"'s" b'Special' b'Company' b'is' b'katra' b'than' b'chops'
b'@-@' b'up' b'history' b'for' b'frets' b'of' b'actions' b'.' b'<eos>' b'Until' b'arrival' b',' b'Griffin' b'wrote' b'that' b'a' b'"' b'sense' b'"' b'included'
b'especially' b'declining' b'individual' b'forces' b',' b'though' b'are' b'stronger' b'<unk>' b'.' b'According' b'to' b'lessen' b'very' b'role' b',' b'Ceres' b'believed' b'he' b'each'
b'conflicted' b'pump' b'fight' b'follows' b'the' b'malignant' b'polynomial' b'to' b'make' b'Albani' b'.' b'The' b'nobility' b'found' b'a' b'spinners' b'from' b'a' b'special' b'to'
b'vertical' b'@-@' b'term' b'crimes' b',' b'and' b'the' b'Neapolitan' b'apparent' b'<unk>' b'show' b'forcing' b'no' b'of' b'the' b'worst' b'traditions' b'of' b'tallest' b'<unk>'
b'teacher' b'+' b'green' b'crushing' b',' b'with' b'4' b'%' b',' b'and' b'560' b'doctrines' b',' b'with' b'other' b'Asian' b'assistance' b'<unk>' b'.' b'The'
b'game' b'is' b'unadorned' b',' b'especially' b'or' b'steadily' b'favoured' b'according' b'to' b'its' b'inside' b',' b'leading' b'to' b'the' b'removal' b'of' b'gauges' b'.'
b'vanishing' b',' b'a' b'jagged' b'race' b'rested' b'with' b'be' b'rich' b'if' b'these' b'legislation' b'remained' b'together' b'.' b'The' b'anthology' b'and' b'initially' b'regularly'
b'Cases' b'Cererian' b'and' b'acknowledge' b'individual' b'being' b'poured' b'with' b'the' b'Chicago' b'melee' b'.' b'Europium' b',' b'<unk>' b',' b'and' b'Lars' b'life' b'for'
b'electron' b'plumage' b',' b'will' b'deprive' b'themselves' b'.' b'The' b'<unk>' b'gryllotalpa' b'behave' b'have' b'Emerald' b'doubt' b'.' b'When' b'limited' b'cubs' b'are' b'rather'
b'attempting' b'to' b'address' b'.' b'Two' b'birds' b'as' b'being' b'also' b'<unk>' b',' b'such' b'as' b'"' b'<unk>' b'"' b',' b'and' b'possessing' b'criminal'
b'spots' b',' b'lambskin' b'ponderosa' b'mosses' b',' b'which' b'might' b'seek' b'to' b'begin' b'less' b'different' b'delineated' b'techniques' b'.' b'Known' b',' b'on' b'the'
b'ground' b',' b'and' b'only' b'cooler' b',' b'first' b'on' b'other' b'females' b'factory' b'in' b'mathematics' b'.' b'Pilgrim' b'alone' b'has' b'a' b'critical' b'substance'
b',' b'probably' b'in' b'line' b'.' b'He' b'used' b'a' b'<unk>' b',' b'with' b'the' b'resin' b'being' b'transported' b'to' b'the' b'12th' b'island' b'during'
b'the' b'year' b'of' b'a' b'mixture' b'show' b'that' b'it' b'is' b'serving' b';' b'they' b'are' b'headed' b'by' b'prone' b'too' b'species' b',' b'rather'
b'than' b'the' b'risk' b'of' b'carbon' b'.' b'In' b'all' b'other' b'typical' b',' b'faith' b'consist' b'of' b'<unk>' b'whereas' b'<unk>' b'when' b'quotes' b'they'
b'Abrams' b'restructuring' b'vessels' b'.' b'It' b'also' b'emerged' b'even' b'when' b'any' b'lack' b'of' b'birds' b'has' b'wide' b'pinkish' b'structures' b',' b'directing' b'a'
b'chelicerae' b'of' b'amputated' b'elementary' b',' b'only' b'they' b'on' b'objects' b'.' b'A' b'female' b'and' b'a' b'female' b'Leisler' b'@-@' b'shaped' b'image' b'for'
b'51' b'@.@' b'5' b'm' b'(' b'5' b'lb' b')' b'Frenchman' b'2' b'at' b'sea' b'times' b'is' b'approximately' b'2' b'years' b'ago' b',' b'particularly'
b'behind' b'reducing' b'Trujillo' b"'s" b'and' b'food' b'specific' b'spores' b'.' b'Males' b'fibrous' b'females' b'can' b'be' b'severely' b'gregarious' b'.' b'The' b'same' b'brood'
b'behind' b'100' b'minutes' b'after' b'it' b'is' b'estimated' b'by' b'damaging' b'the' b'nest' b'base' b',' b'with' b'some' b'other' b'rare' b'birds' b'and' b'behavior'
b',' b'no' b'transport' b'and' b'Duty' b'demand' b'.' b'Two' b'rare' b'chicks' b'have' b'from' b'feed' b'engage' b'to' b'come' b'with' b'some' b'part' b'of'
b'nesting' b'.' b'The' b'1808' b'to' b'be' b'reduced' b'to' b'Scots' b'and' b'fine' b'stones' b'.' b'There' b'they' b'also' b'purple' b'limitations' b'of' b'certain'
b'skin' b'material' b'usually' b'move' b'during' b'somewhat' b'.' b'A' b'mothers' b'of' b'external' b'take' b'from' b'poaching' b',' b'typically' b'have' b'people' b'processes' b'and'
b'toll' b';' b'while' b'bird' b'plumage' b'differs' b'to' b'Fight' b',' b'they' b'may' b'be' b'open' b'after' b'<unk>' b',' b'thus' b'rarely' b'their' b'<unk>'
b'for' b'a' b'emotional' b'circle' b'.' b'Rough' b'Dahlan' b'probably' b'suggested' b'how' b'they' b'impose' b'their' b'cross' b'of' b'relapse' b'where' b'they' b'changed' b'.'
b'They' b'popularisation' b'them' b'of' b'their' b'<unk>' b',' b'charming' b'by' b'limited' b'or' b'Palestinians' b'the' b'<unk>' b'<unk>' b'.' b'Traffic' b'of' b'areas' b'headed'
b',' b'and' b'their' b'push' b'will' b'articulate' b'.' b'<eos>' b'<unk>' b'would' b'be' b'criticized' b'by' b'protein' b'rice' b',' b'particularly' b'often' b'rather' b'of'
b'the' b'cellular' b'extent' b'.' b'They' b'could' b'overlap' b'forward' b',' b'and' b'there' b'are' b'no' b'governing' b'land' b',' b'they' b'do' b'not' b'find'
b'it' b'.' b'In' b'one' b'place' b',' b'reddish' b'kakapo' b'(' b'kakapo' b'<unk>' b')' b'might' b'be' b'performed' b'that' b'conduct' b',' b'stadia' b','
b'gene' b'or' b'air' b',' b'noise' b',' b'and' b'offensive' b'or' b'skin' b',' b'which' b'may' b'be' b'commercially' b'organized' b'strong' b'method' b'.' b'In'
b'changing' b',' b'Chen' b'and' b'eukaryotes' b'were' b'Membrane' b'spiders' b'in' b'larger' b'growth' b',' b'by' b'some' b'regions' b'.' b'If' b'up' b'about' b'5'
b'%' b'of' b'the' b'males' b',' b'there' b'are' b'displays' b'that' b'shift' b'the' b'bird' b'inclination' b'after' b'supreme' b'<unk>' b'to' b'move' b'outside' b'tests'
b'.' b'The' b'aim' b'of' b'Mouquet' b'Sites' b'is' b'faster' b'as' b'an' b'easy' b'asteroid' b',' b'with' b'ocean' b'or' b'grey' b',' b'albeit' b','
b'as' b'they' b'they' b'CBs' b',' b'and' b'do' b'not' b'be' b'performed' b',' b'greatly' b'on' b'other' b'insects' b',' b'they' b'can' b'write' b'chromosomes'
b',' b'and' b'planners' b',' b'galericulata' b'should' b'be' b'a' b'bird' b'.' b'Also' b'on' b'a' b'holodeck' b'they' b'were' b'divine' b'out' b'of' b'bare'
b'handwriting' b'.' b'Unlike' b'this' b',' b'they' b'makes' b'only' b'anything' b'a' b'variation' b'of' b'skin' b'skeletons' b'further' b'.' b'They' b'have' b'to' b'be'
b'able' b'under' b'their' b'herding' b'tree' b',' b'or' b'dart' b'.' b'When' b'many' b'hypothesis' b'(' b'plant' b',' b'they' b'were' b'@-@' b'looped' b'aged'
b'play' b')' b'is' b'very' b'clear' b'as' b'very' b'on' b'comparison' b'.' b'<eos>' b'Furthermore' b',' b'Wikimania' b'decorations' b'@-@' b'sponsored' b'naming' b'hydrogen' b'when'
b'the' b'kakapo' b'commenced' b',' b'they' b'are' b'slowly' b'on' b'heavy' b'isolation' b'.' b'Sometimes' b'that' b'Larssen' b'leave' b'gently' b',' b'they' b'usually' b'made'
b'short' b'care' b'of' b'feral' b'or' b'any' b'dual' b'species' b'.' b'<eos>' b'Further' b'males' b'that' b'outfitting' b',' b'when' b'there' b'are' b'two' b'envelope'
b'shorter' b'flocks' b'to' b'be' b'males' b'ideally' b'they' b'are' b'highly' b'emission' b'.' b'<eos>' b'As' b'of' b'danger' b',' b'taking' b'in' b'one' b'of'
b'the' b'other' b'surviving' b'structure' b'of' b'Ceres' b'can' b'be' b'rebuffed' b'to' b'be' b'caused' b'by' b'any' b'combination' b'of' b'food' b'or' b'modified' b'its'
它不是 GPT-2,但看起來(lái)該模型已開(kāi)始學(xué)習(xí)語(yǔ)言結(jié)構(gòu)!
我們幾乎準(zhǔn)備好演示動(dòng)態(tài)量化。 我們只需要定義一些輔助函數(shù):
bptt = 25
criterion = nn.CrossEntropyLoss()
eval_batch_size = 1
## create test data set
def batchify(data, bsz):
# Work out how cleanly we can divide the dataset into bsz parts.
nbatch = data.size(0) // bsz
# Trim off any extra elements that wouldn't cleanly fit (remainders).
data = data.narrow(0, 0, nbatch * bsz)
# Evenly divide the data across the bsz batches.
return data.view(bsz, -1).t().contiguous()
test_data = batchify(corpus.test, eval_batch_size)
## Evaluation functions
def get_batch(source, i):
seq_len = min(bptt, len(source) - 1 - i)
data = source[i:i+seq_len]
target = source[i+1:i+1+seq_len].view(-1)
return data, target
def repackage_hidden(h):
"""Wraps hidden states in new Tensors, to detach them from their history."""
if isinstance(h, torch.Tensor):
return h.detach()
else:
return tuple(repackage_hidden(v) for v in h)
def evaluate(model_, data_source):
# Turn on evaluation mode which disables dropout.
model_.eval()
total_loss = 0.
hidden = model_.init_hidden(eval_batch_size)
with torch.no_grad():
for i in range(0, data_source.size(0) - 1, bptt):
data, targets = get_batch(data_source, i)
output, hidden = model_(data, hidden)
hidden = repackage_hidden(hidden)
output_flat = output.view(-1, ntokens)
total_loss += len(data) * criterion(output_flat, targets).item()
return total_loss / (len(data_source) - 1)
最后,我們可以在模型上調(diào)用torch.quantization.quantize_dynamic
! 特別,
nn.LSTM
和nn.Linear
模塊進(jìn)行量化int8
值import torch.quantization
quantized_model = torch.quantization.quantize_dynamic(
model, {nn.LSTM, nn.Linear}, dtype=torch.qint8
)
print(quantized_model)
得出:
LSTMModel(
(drop): Dropout(p=0.5, inplace=False)
(encoder): Embedding(33278, 512)
(rnn): DynamicQuantizedLSTM(
512, 256, num_layers=5, dropout=0.5
(_all_weight_values): ModuleList(
(0): PackedParameter()
(1): PackedParameter()
(2): PackedParameter()
(3): PackedParameter()
(4): PackedParameter()
(5): PackedParameter()
(6): PackedParameter()
(7): PackedParameter()
(8): PackedParameter()
(9): PackedParameter()
)
)
(decoder): DynamicQuantizedLinear(
in_features=256, out_features=33278
(_packed_params): LinearPackedParams()
)
)
該模型看起來(lái)相同; 這對(duì)我們有什么好處? 首先,我們看到模型尺寸顯著減?。?/p>
def print_size_of_model(model):
torch.save(model.state_dict(), "temp.p")
print('Size (MB):', os.path.getsize("temp.p")/1e6)
os.remove('temp.p')
print_size_of_model(model)
print_size_of_model(quantized_model)
得出:
Size (MB): 113.941574
Size (MB): 76.807204
其次,我們看到了更快的推斷時(shí)間,而評(píng)估損失沒(méi)有差異:
注意:由于量化模型運(yùn)行單線程,因此用于單線程比較的線程數(shù)為 1。
torch.set_num_threads(1)
def time_model_evaluation(model, test_data):
s = time.time()
loss = evaluate(model, test_data)
elapsed = time.time() - s
print('''loss: {0:.3f}\nelapsed time (seconds): {1:.1f}'''.format(loss, elapsed))
time_model_evaluation(model, test_data)
time_model_evaluation(quantized_model, test_data)
得出:
loss: 5.167
elapsed time (seconds): 233.9
loss: 5.168
elapsed time (seconds): 164.9
在 MacBook Pro 上本地運(yùn)行此程序,無(wú)需進(jìn)行量化,推理大約需要 200 秒,而進(jìn)行量化則只需大約 100 秒。
動(dòng)態(tài)量化可能是減小模型大小的簡(jiǎn)單方法,而對(duì)精度的影響有限。
謝謝閱讀! 與往常一樣,我們歡迎您提供任何反饋,因此,如果有任何問(wèn)題,請(qǐng)?jiān)诖颂巹?chuàng)建一個(gè)問(wèn)題。
腳本的總運(yùn)行時(shí)間:(6 分鐘 43.291 秒)
Download Python source code: dynamic_quantization_tutorial.py
Download Jupyter notebook: dynamic_quantization_tutorial.ipynb
更多建議: