ppocr.PPRecognizer 类
Classes
PPRecognizer (Recognizer)
Methods
__init__(self, model_name='ch_PP-OCRv3', *, cand_alphabet=None, model_fp=None, root='/home/docs/.cnocr', rec_image_shape='3, 32, 320', use_space_char=True, **kwargs)
special
来自 ppocr 的文本识别器。
Parameters:
Name | Type | Description | Default |
---|---|---|---|
model_name |
str |
模型名称。默认为 |
'ch_PP-OCRv3' |
cand_alphabet |
Optional[Union[Collection, str]] |
待识别字符所在的候选集合。默认为 |
None |
model_fp |
Optional[str] |
如果不使用系统自带的模型,可以通过此参数直接指定所使用的模型文件('.ckpt' 文件) |
None |
root |
Union[str, Path] |
模型文件所在的根目录
Linux/Mac下默认值为 |
'/home/docs/.cnocr' |
rec_image_shape |
str |
输入图片尺寸,无需更改使用默认值即可。默认值: |
'3, 32, 320' |
use_space_char |
bool |
是否使用空格字符,无需更改使用默认值即可。默认值: |
True |
**kwargs |
目前未被使用。 |
{} |
Source code in cnocr/ppocr/pp_recognizer.py
def __init__(
self,
model_name: str = 'ch_PP-OCRv3',
*,
cand_alphabet: Optional[Union[Collection, str]] = None,
model_fp: Optional[str] = None,
root: Union[str, Path] = data_dir(),
rec_image_shape: str = "3, 32, 320",
use_space_char: bool = True,
**kwargs
):
"""
来自 ppocr 的文本识别器。
Args:
model_name (str): 模型名称。默认为 `ch_PP-OCRv3`
cand_alphabet (Optional[Union[Collection, str]]): 待识别字符所在的候选集合。默认为 `None`,表示不限定识别字符范围
model_fp (Optional[str]): 如果不使用系统自带的模型,可以通过此参数直接指定所使用的模型文件('.ckpt' 文件)
root (Union[str, Path]): 模型文件所在的根目录
Linux/Mac下默认值为 `~/.cnocr`,表示模型文件所处文件夹类似 `~/.cnocr/2.1/densenet_lite_136-fc`
Windows下默认值为 `C:/Users/<username>/AppData/Roaming/cnocr`
rec_image_shape (str): 输入图片尺寸,无需更改使用默认值即可。默认值:`"3, 32, 320"`
use_space_char (bool): 是否使用空格字符,无需更改使用默认值即可。默认值:`True`
**kwargs: 目前未被使用。
"""
self.rec_image_shape = [int(v) for v in rec_image_shape.split(",")]
self.rec_algorithm = 'CRNN'
self._model_name = model_name
self._model_backend = 'onnx'
vocab_fp = AVAILABLE_MODELS.get_vocab_fp(self._model_name, self._model_backend)
self._assert_and_prepare_model_files(model_fp, root)
postprocess_params = {
'name': 'CTCLabelDecode',
'character_dict_path': vocab_fp,
'use_space_char': use_space_char,
'cand_alphabet': cand_alphabet,
}
self.postprocess_op = build_post_process(postprocess_params)
(
self.predictor,
self.input_tensor,
self.output_tensors,
self.config,
) = create_predictor(self._model_fp, 'rec', logger)
self.use_onnx = True
recognize(self, img_list, batch_size=1)
Batch recognize characters from a list of one-line-characters images.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
img_list |
List[Union[str, Path, np.ndarray]] |
list of images, in which each element should be a line image array with np.ndarray. Each element should be a tensor with values ranging from 0 to 255, and with shape [height, width] or [height, width, channel]. The optional channel should be 1 (gray image) or 3 (RGB-format color image). 注:img_list 不宜包含太多图片,否则同时导入这些图片会消耗很多内存。 |
required |
batch_size |
int |
待处理图片很多时,需要分批处理,每批图片的数量由此参数指定。默认为 |
1 |
Returns:
Type | Description |
---|---|
list |
list of (chars, prob), such as [('第一行', 0.80), ('第二行', 0.75), ('第三行', 0.9)] |
Source code in cnocr/ppocr/pp_recognizer.py
def recognize(
self, img_list: List[Union[str, Path, np.ndarray]], batch_size: int = 1,
) -> List[Tuple[str, float]]:
"""
Batch recognize characters from a list of one-line-characters images.
Args:
img_list (List[Union[str, Path, np.ndarray]]):
list of images, in which each element should be a line image array with np.ndarray.
Each element should be a tensor with values ranging from 0 to 255,
and with shape [height, width] or [height, width, channel].
The optional channel should be 1 (gray image) or 3 (RGB-format color image).
注:img_list 不宜包含太多图片,否则同时导入这些图片会消耗很多内存。
batch_size: 待处理图片很多时,需要分批处理,每批图片的数量由此参数指定。默认为 `1`。
Returns:
list: list of (chars, prob), such as
[('第一行', 0.80), ('第二行', 0.75), ('第三行', 0.9)]
"""
if len(img_list) == 0:
return []
img_list = [self._prepare_img(img) for img in img_list]
img_num = len(img_list)
# Calculate the aspect ratio of all text bars
width_list = []
for img in img_list:
width_list.append(img.shape[1] / float(img.shape[0]))
# Sorting can speed up the recognition process
indices = np.argsort(np.array(width_list))
rec_res = [['', 0.0]] * img_num
for beg_img_no in range(0, img_num, batch_size):
end_img_no = min(img_num, beg_img_no + batch_size)
norm_img_batch = []
max_wh_ratio = 0
for ino in range(beg_img_no, end_img_no):
h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for ino in range(beg_img_no, end_img_no):
if self.rec_algorithm != "SRN" and self.rec_algorithm != "SAR":
norm_img = self.resize_norm_img(
img_list[indices[ino]], max_wh_ratio
)
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
norm_img_batch = np.concatenate(norm_img_batch)
norm_img_batch = norm_img_batch.copy()
input_dict = dict()
input_dict[self.input_tensor.name] = norm_img_batch
outputs = self.predictor.run(self.output_tensors, input_dict)
preds = outputs[0]
rec_result = self.postprocess_op(preds)
for rno in range(len(rec_result)):
rec_res[indices[beg_img_no + rno]] = rec_result[rno]
return rec_res
resize_norm_img(self, img, max_wh_ratio)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
img |
with shape of (height, width, channel) |
required | |
max_wh_ratio |
required |
Source code in cnocr/ppocr/pp_recognizer.py
def resize_norm_img(self, img, max_wh_ratio):
"""
Args:
img (): with shape of (height, width, channel)
max_wh_ratio ():
Returns:
"""
imgC, imgH, imgW = self.rec_image_shape
assert imgC == img.shape[2]
imgW = int((32 * max_wh_ratio))
if self.use_onnx:
w = self.input_tensor.shape[3:][0]
if w is not None and w > 0:
imgW = w
h, w = img.shape[:2]
ratio = w / float(h)
if math.ceil(imgH * ratio) > imgW:
resized_w = imgW
else:
resized_w = int(math.ceil(imgH * ratio))
# resized_image = cv2.resize(img, (resized_w, imgH))
resized_image = resize_img(
img.transpose((2, 0, 1)), target_h_w=(imgH, resized_w), return_torch=False
).transpose((1, 2, 0))
resized_image = resized_image.astype('float32')
resized_image = resized_image.transpose((2, 0, 1)) / 255
resized_image -= 0.5
resized_image /= 0.5
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
padding_im[:, :, 0:resized_w] = resized_image
return padding_im