NumPy搭建我的第一個神經(jīng)網(wǎng)絡(luò)
前言
? 利用純numpy實現(xiàn)手勢識別,首先是進行的整體的網(wǎng)絡(luò)構(gòu)成,然后再展示代碼部分,。這是我的第一個神經(jīng)網(wǎng)絡(luò),。
完整代碼: GitHub
網(wǎng)絡(luò)大體體現(xiàn):
? 輸入層,隱藏層,輸出層。已經(jīng)知道的是輸出層是有十個結(jié)果的,就是10個數(shù)字的概率,。
關(guān)于訓(xùn)練集,驗證集,測試集
關(guān)于手算的梯度下降
三個參數(shù)的求解
第一個參數(shù)手算詳細過程
代碼部分
激活函數(shù),以及激活函數(shù)的導(dǎo)數(shù)
def tanh(x):
return np.tanh(x)
def bypass(x):
return x
def softmax(x):
exp=np.exp(x-x.max()) # 為了防止指數(shù)爆炸(指數(shù)函數(shù))
# 所以現(xiàn)在就要將其中的減去最大值,變成負數(shù),但是最后是相處,
#結(jié)果都是一樣的,。
return exp/exp.sum()
def d_softmax(data):
sm = softmax(data)
return np.diag(sm) - np.outer(sm, sm)
def d_tanh(data): # 返回值返回向量
return 1 / (np.cosh(data)) ** 2
def d_bypass(x):
return 1
differential = {softmax: d_softmax, tanh: d_tanh,bypass:d_bypass}
d_type = {bypass:'times',softmax:'dot',tanh:'times'}
初始化參數(shù)
dimensions=[28*28,100,10] # 28*28個神經(jīng)元,100個神經(jīng)元是隱藏層,輸出的10個數(shù)字
# 28*28個神經(jīng)元先是連接到100個神經(jīng)元中再連接到10個輸出層中
activation =[bypass,tanh,softmax] # bypass 兩個激活函數(shù)
distribution=[ # 初始化過程
{},# 空著
{'b':[0,0],'w':[-math.sqrt(6/(dimensions[0]+dimensions[1])),math.sqrt(6/(dimensions[0]+dimensions[1]))]},
{'b':[0,0],'w':[-math.sqrt(6/(dimensions[1]+dimensions[2])),math.sqrt(6/(dimensions[1]+dimensions[2]))]},
]
初始化參數(shù)函數(shù)
def init_parameters_b(layer): # 初始化b
dist= distribution[layer]['b']
return np.random.rand(dimensions[layer])*(dist[1]-dist[0])+dist[0]
def init_parameters_w(layer):
dist=distribution[layer]['w']
return np.random.rand(dimensions[layer-1],dimensions[layer])*(dist[1]-dist[0])+dist[0]
def init_parameters():
parameter=[] # 迭代每一次的結(jié)果
for i in range (len(distribution)):
layer_parameter={} # 存放每一次迭代的數(shù)據(jù),并且一直存儲起來
for j in distribution[i].keys():
if j=='b':
layer_parameter['b']=init_parameters_b(i)
continue
if j=='w':
layer_parameter['w']=init_parameters_w(i)
continue
parameter.append(layer_parameter)
return parameter
最開始的初始化的參數(shù)(未經(jīng)過訓(xùn)練)
parameters=init_parameters()
預(yù)測函數(shù)
def predict(img,parameters):
# 參數(shù):圖片,參數(shù)
l_in=img
l_out=activation[0](l_in) # 第一層的初始化
for layer in range(1,len(dimensions)):
l_in = np.dot(l_out,parameters[layer]['w'])+parameters[layer]['b'] # 不斷地進行迭代
l_out = activation[layer](l_in)
return l_out
第一次的預(yù)測(本次的預(yù)測由于是使用初始化的模型,所以是不穩(wěn)定)
predict(train_img[0],init_parameters())
//結(jié)果:
array([0.07210171, 0.07957606, 0.13152407, 0.05420442, 0.08498909,
0.12788144, 0.14911174, 0.14570486, 0.08225591, 0.07265069])
訓(xùn)練集、驗證集,、測試集
dataset_path=Path('D:/Desktop/MNIST')
train_img_path=dataset_path/'train-images-idx3-ubyte/train-images.idx3-ubyte'
train_lab_path=dataset_path/'train-labels-idx1-ubyte/train-labels.idx1-ubyte'
test_img_path=dataset_path/'t10k-images-idx3-ubyte/t10k-images.idx3-ubyte'
test_lab_path=dataset_path/'t10k-labels-idx1-ubyte/t10k-labels.idx1-ubyte'
各個集合分開
train_num = 50000 # 訓(xùn)練
valid_num = 10000 # 驗證
test_num = 10000 # 測試
with open(train_img_path, 'rb') as f:
struct.unpack('>4i', f.read(16))
temp_img = np.fromfile(f, dtype=np.uint8).reshape(-1, 28 * 28) / 255
train_img = temp_img[:train_num] # 將訓(xùn)練集中的數(shù)據(jù)分了1w出去給驗證
valid_img = temp_img[train_num:]
with open(test_img_path, 'rb') as f:
struct.unpack('>4i', f.read(16))
test_img = np.fromfile(f, dtype=np.uint8).reshape(-1, 28 * 28) / 255
with open(train_lab_path, 'rb') as f:
struct.unpack('>2i', f.read(8))
temp_lab = np.fromfile(f, dtype=np.uint8)
train_lab = temp_lab[:train_num]
valid_lab = temp_lab[train_num:]
with open(test_lab_path, 'rb') as f:
struct.unpack('>2i', f.read(8))
test_lab = np.fromfile(f, dtype=np.uint8)
展示圖片標(biāo)簽
def show_train(index):
plt.imshow(train_img[index].reshape(28, 28), cmap='gray')
pylab.show()
print('label:{}'.format(train_lab[index]))
def show_valid(index):
plt.imshow(valid_img[index].reshape(28, 28), cmap='gray')
pylab.show()
print('label:{}'.format(valid_lab[index]))
def show_test(index):
plt.imshow(test_img[index].reshape(28, 28), cmap='gray')
pylab.show()
print('test:{}'.format(test_lab[index]))
運用隨機數(shù)進行預(yù)測結(jié)果
predict(np.random.rand(784),parameters)
//結(jié)果:
array([0.0942381 , 0.11644771, 0.05850607, 0.23711087, 0.02732923,
0.0176975 , 0.19317991, 0.14196864, 0.08510021, 0.02842176])
驗證自己的導(dǎo)數(shù)有沒有寫對,用定義驗證,不然要是導(dǎo)數(shù)導(dǎo)錯了的話,后面都白費了,。
h = 0.0001
func = softmax
input_len = 4
for i in range(input_len): # 兩種求導(dǎo)數(shù)的方法
# 一種是定義方法,與一種是直接求導(dǎo)法
test_input = np.random.rand(input_len)
derivative = differential[func](test_input)
value1 = func(test_input)
test_input[i] += h
value2 = func(test_input)
# print((value2 - value1) / h)
# print(derivative[i] - (value2 - value1) / h) # 差值
onehot = np.identity(dimensions[-1]) # 10個數(shù)字
梯度下降,這里就是要反向傳遞(簡單來說就是鏈?zhǔn)角髮?dǎo)),為了讓loss_function的值盡可能的小,就是要將y1-y0的值盡可能接近,所以就是讓y1更加接近真值,然后將初始的值進行一次更新,不斷重復(fù)更新。
def sqr_loss(img, lab, parameters):
y_pred = predict(img, parameters)
y = onehot[lab]
diff = y - y_pred
return np.dot(diff, diff)
def grad_parameters(img, lab, parameters):
# 參數(shù):圖片,參數(shù)
l_in_list=[img] # 第一個參數(shù)是0所以img+0是等于img
l_out_list=[activation[0](l_in_list[0])] # 第一個out也是初始化之后的out
for layer in range(1,len(dimensions)):
l_in = np.dot(l_in_list[layer-1], parameters[layer]['w']) + parameters[layer]['b']
l_out = activation[layer](l_in)
l_in_list.append(l_in)
l_out_list.append(l_out)
d_layer =-2*(onehot[lab] - l_out_list[-1])
grad_result=[None]*len(dimensions)
for layer in range(len(dimensions)-1,0,-1): # 反向傳播
if d_type[activation[layer]]=='times':
d_layer = differential[activation[layer]](l_in_list[layer])*d_layer
if d_type[activation[layer]]=='dot':
d_layer = np.dot(differential[activation[layer]](l_in_list[layer]), d_layer)
# 參數(shù)一:
grad_result[layer]={}
grad_result[layer]['b'] = d_layer
grad_result[layer]['w'] = np.outer(l_out_list[layer-1], d_layer) # 上一層的結(jié)果
d_layer = np.dot(parameters[layer]['w'],d_layer)
return grad_result
反向傳播之后的參數(shù)結(jié)果
grad_parameters(train_img[0],train_lab[0],init_parameters())
驗證反向傳播的求偏導(dǎo)是否正確
# 驗證參數(shù)b
h = 0.00001 # 驗證反向傳遞求的對不對,求導(dǎo)過程,。
layer=2
parameters = init_parameters()
pname='b'
for i in range(len(parameters[layer][pname])): # 兩種求導(dǎo)數(shù)的方法
# 一種是定義方法,與一種是直接求導(dǎo)法
img_i = np.random.randint(train_num) # 隨機找數(shù)字圖片
test_parameters = init_parameters() # 隨機找
derivative = grad_parameters(train_img[img_i], train_lab[img_i], test_parameters)[layer][pname]
value1 = sqr_loss(train_img[img_i], train_lab[img_i], test_parameters)
test_parameters[layer][pname][i] += h
value2 = sqr_loss(train_img[img_i], train_lab[img_i], test_parameters)
print(derivative[i]-(value2-value1)/h)
# 驗證參數(shù)w
h = 0.00001 # 驗證方向傳遞求的對不對,求導(dǎo)過程,。
layer=1
parameters = init_parameters()
pname='w'
grad_list=[]
for i in range(len(parameters[layer][pname])): # 兩種求導(dǎo)數(shù)的方法
for j in range(len(parameters[layer][pname][0])):
img_i = np.random.randint(train_num) # 隨機找數(shù)字圖片
test_parameters = init_parameters() # 隨機找
derivative = grad_parameters(train_img[img_i], train_lab[img_i], test_parameters)[layer][pname]
value1 = sqr_loss(train_img[img_i], train_lab[img_i], test_parameters)
test_parameters[layer][pname][i][j] += h
value2 = sqr_loss(train_img[img_i], train_lab[img_i], test_parameters)
grad_list.append(derivative[i][j]-(value2-value1)/h)
np.abs(grad_list).max()
損失函數(shù)
def valid_loss(parameters): # 驗證集函數(shù)
loss_accu = 0
for img_i in range(valid_num):
loss_accu += sqr_loss(valid_img[img_i], valid_lab[img_i], parameters)
return loss_accu / (valid_num / 10000) # 每1w張圖片的loss_accu
# 進行歸一化,1w個圖片為整體
def valid_accuracy(parameters): # 準(zhǔn)確率
correct = [predict(valid_img[img_i], parameters).argmax() == valid_lab[img_i] for img_i in range(valid_num)]
return correct.count(True) / len(correct)
def train_loss(parameters): # 驗證集函數(shù)
loss_accu = 0
for img_i in range(train_num):
loss_accu += sqr_loss(train_img[img_i], train_lab[img_i], parameters)
return loss_accu / (train_num / 10000)
def train_accuracy(parameters): # 準(zhǔn)確率
correct = [predict(train_img[img_i], parameters).argmax() == train_lab[img_i] for img_i in range(train_num)]
return correct.count(True) / len(correct)
def test_accuracy(parameters): # 準(zhǔn)確率
correct = [predict(test_img[img_i], parameters).argmax() == test_lab[img_i] for img_i in range(test_num)]
return correct.count(True) / len(correct)
def grad_add(grad1,grad2):
for layer in range(1,len(grad1)):
for pname in grad1[layer].keys():
grad1[layer][pname]+=grad2[layer][pname]
return grad1
def grad_divide(grad,denominator):
for layer in range(1,len(grad)):
for pname in grad[layer].keys():
grad[layer][pname]/=denominator
return grad
def combine_parameters(parameters, grad, learn_rate): # 新的參數(shù)的形成
parameter_tmp = copy.deepcopy(parameters)
for layer in range(len(parameter_tmp)):
for pname in parameter_tmp[layer].keys():
parameter_tmp[layer][pname] -= learn_rate * grad[layer][pname]
return parameter_tmp
訓(xùn)練量的一個整體
batch_size = 100 # 一百張圖片為整體
# 100張圖片當(dāng)一小塊
# 訓(xùn)練一小塊
def train_batch(current_batch, parameters): # 將這一百張圖片的梯度計算起來,拿到一個平均值!
grad_accu = grad_parameters(train_img[current_batch * batch_size], train_lab[current_batch * batch_size],
parameters)
for img_i in range(1, batch_size):
grad_temp = grad_parameters(train_img[current_batch * batch_size + img_i],
train_lab[current_batch * batch_size + img_i], parameters)
grad_add(grad_accu,grad_temp)
# 獲得累加的梯度
grad_divide(grad_accu,batch_size) # 拿到方向
return grad_accu
parameters = init_parameters()
訓(xùn)練過程
from tqdm import tqdm_notebook
current_epoch = 0
train_loss_list = [] # 存儲loss的數(shù)值
valid_loss_list = [] # 存儲驗證的loss的數(shù)值
train_accu_list = [] # 訓(xùn)練的正確性
valid_accu_list = [] # 驗證集的正確性
learn_rate = 10**-0.3 # 學(xué)習(xí)率到最后要變小
epoch_num = 5 # 訓(xùn)練的次數(shù) 訓(xùn)練完一次叫一個epoch
for epoch in tqdm_notebook(range(epoch_num)):
for i in range(train_num // batch_size):
# if i % 100 == 99:
# print('running batch{}/{}'.format(i + 1, train_num // batch_size))
grad_tmp = train_batch(i, parameters)
parameters = combine_parameters(parameters, grad_tmp, learn_rate)
current_epoch += 1
train_loss_list.append(train_loss(parameters))
train_accu_list.append(train_accuracy(parameters))
valid_loss_list.append(valid_loss(parameters))
valid_accu_list.append(valid_accuracy(parameters))
valid_accuracy(parameters)
驗證集和訓(xùn)練集的loss
lower = -0
plt.plot(valid_loss_list[lower:], color='black', label='validation loss')
plt.plot(train_loss_list[lower:], color='red', label='train loss')
plt.show()
驗證集和訓(xùn)練集的精確率
plt.plot(valid_accu_list[lower:], color='black', label='validation accuracy')
plt.plot(train_accu_list[lower:], color='red', label='train accuracy')
plt.show()