#23 test

Merged
jtc merged 5 commits from lsyzz/ccCAN-SAN:test into master 1 year ago
  1. +2
    -2
      14.yaml
  2. +1
    -1
      bad_case.json
  3. +63325
    -0
      data/clean_labels.txt
  4. +20
    -3
      infer/Backbone.py
  5. BIN
      infer/__pycache__/Backbone.cpython-39.pyc
  6. +4
    -4
      inference.py
  7. +3839
    -3923
      language_model_eval.csv
  8. +2176
    -0
      language_model_train.csv
  9. +2
    -2
      models/Backbone.py
  10. BIN
      models/Hierarchical_attention/__pycache__/decoder.cpython-39.pyc
  11. +12
    -6
      read_labels.py

+ 2
- 2
14.yaml View File

@@ -16,8 +16,8 @@ eps: 1e-6
weight_decay: 1e-4
beta: 0.9
image_resize: True
image_width: 3200
image_height: 400
image_width: 1600
image_height: 320
image_channel: 1
dropout: True
dropout_ratio: 0.5


+ 1
- 1
bad_case.json
File diff suppressed because it is too large
View File


+ 63325
- 0
data/clean_labels.txt
File diff suppressed because it is too large
View File


+ 20
- 3
infer/Backbone.py View File

@@ -3,8 +3,8 @@ import models
from infer.san_decoder import SAN_decoder
import torch.nn.functional as F
from models.CAN.counting import CountingDecoder as counting_decoder
import matplotlib.pyplot as plt
import numpy as np
class Backbone(nn.Module):
def __init__(self, params=None):
super(Backbone, self).__init__()
@@ -22,9 +22,10 @@ class Backbone(nn.Module):
self.ratio = params['densenet']['ratio'] if params['encoder']['net'] == 'DenseNet' else 16 * params['resnet'][
'conv1_stride']

def forward(self, images, images_mask):
def forward(self, images, images_mask, name):
counting_mask = images_mask[:, :, ::self.ratio, ::self.ratio]
cnn_features = self.encoder(images)
# visulize_all_channel_into_one(cnn_features, name)
counting_preds1, _ = self.counting_decoder1(cnn_features, counting_mask)
counting_preds2, _ = self.counting_decoder2(cnn_features, counting_mask)
counting_preds = (counting_preds1 + counting_preds2) / 2
@@ -32,6 +33,22 @@ class Backbone(nn.Module):
return prediction



def visulize_all_channel_into_one(feature_map, i):
output = feature_map

output = output.data.squeeze()
output = output.cpu().numpy()

output = np.mean(output, axis=0)

height, width = 320, 1600
times = height / float(width)
plt.rcParams["figure.figsize"] = (1, times)
plt.axis('off')
plt.imshow(output, cmap='jet', interpolation='bilinear')
plt.savefig('vis/{}.png'.format(i), dpi=3 * height)

class SupConHead(nn.Module):
"""backbone + projection head"""



BIN
infer/__pycache__/Backbone.cpython-39.pyc View File


+ 4
- 4
inference.py View File

@@ -15,8 +15,8 @@ from utils import compute_edit_distance

parser = argparse.ArgumentParser(description='Spatial channel attention')
parser.add_argument('--config', default='14.yaml', type=str, help='配置文件路径')
parser.add_argument('--image_path', default='data/CROHME/19_test_images.pkl', type=str, help='测试image路径')
parser.add_argument('--label_path', default='data/CROHME/19_test_labels.txt', type=str, help='测试label路径')
parser.add_argument('--image_path', default='data/CROHME/14_test_images.pkl', type=str, help='测试image路径')
parser.add_argument('--label_path', default='data/CROHME/14_test_labels.txt', type=str, help='测试label路径')
args = parser.parse_args()

if not args.config:
@@ -108,8 +108,8 @@ with torch.no_grad():
image_mask = torch.ones(image.shape)
image, image_mask = image.to(device), image_mask.to(device)

prediction = model(image, image_mask)
prediction = model(image, image_mask, name)
print(prediction)
latex_list = convert(1, prediction)
latex_string = ' '.join(latex_list)
if latex_string == label.strip():


+ 3839
- 3923
language_model_eval.csv
File diff suppressed because it is too large
View File


+ 2176
- 0
language_model_train.csv
File diff suppressed because it is too large
View File


+ 2
- 2
models/Backbone.py View File

@@ -41,7 +41,7 @@ class Backbone(nn.Module):
counting_loss = self.counting_loss(counting_preds1, counting_labels) + self.counting_loss(counting_preds2,
counting_labels) \
+ self.counting_loss(counting_preds, counting_labels)
counting_loss = 0.1 * counting_loss
word_probs, struct_probs, words_alphas, struct_alphas, c2p_probs, c2p_alphas, word_states, c2p_out_states = self.decoder(
cnn_features, labels, images_mask, labels_mask, counting_preds, is_train=is_train)

@@ -239,7 +239,7 @@ class SupConLoss(nn.Module):
loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos
loss = loss.view(anchor_count, batch_size).mean()

loss = 0.1 * loss
loss = 0.05 * loss

return loss



BIN
models/Hierarchical_attention/__pycache__/decoder.cpython-39.pyc View File


+ 12
- 6
read_labels.py View File

@@ -13,14 +13,18 @@ label_path = './data/train_caption.txt'
# for item in labels:
# name, *labels = item.split()
# label = ' '.join(labels)
# if len(labels) > 15:
# if len(labels) > 25:
# continue
# if'limits' in label or len(labels) == 0:
# continue
# inp.append(label)
# gt.append(label)
#
# train_voc = 'language_model_train.csv'
# pd.DataFrame({'inp':inp, 'gt':gt}).to_csv(train_voc, index=None, sep='\t')
#

# 生成测试集
word_path = './data/word.txt'
new_word_path = './data/word1.txt'
with open(word_path) as f:
@@ -65,16 +69,18 @@ inp,gt = [], []
c = 0
for item in f:
name, *label = item.split()
if len(label) > 15:
if len(label) > 25:
continue
label1 = ' '.join(disturb(label, 1, labels))
label2 = ' '.join(label)
if 'limits' in label1:
continue
inp.append(' '.join(disturb(label, 1, labels)))
gt.append(' '.join(label))
if label1 == label2:
c = c+1

# eval_voc = 'language_model_eval.csv'
# pd.DataFrame({'inp':inp, 'gt':gt}).to_csv(eval_voc, index=None, sep='\t')
inp = inp[:5000]
gt = gt[:5000]
eval_voc = 'language_model_eval.csv'
pd.DataFrame({'inp':inp, 'gt':gt}).to_csv(eval_voc, index=None, sep='\t')

Loading…
Cancel
Save