Рет қаралды 177
#git clone github.com/fcakyon/craft-text...
#fix craft before install for opencv higher 4.5
#do not install requirements.txt as it installs torch without cuda
#github.com/clovaai/CRAFT-pyto...
#fix basenet/vgg16_bn.py in git cloned dir
##comment the "from torchvision.models.vgg import model_urls"
##
##
##Insert all this:
##
##all = [
##'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
##'vgg19_bn', 'vgg19',
##]
##
##model_urls = {
##'vgg11': 'download.pytorch.org/models/v...,
##'vgg13': 'download.pytorch.org/models/v...,
##'vgg16': 'download.pytorch.org/models/v...,
##'vgg19': 'download.pytorch.org/models/v...,
##'vgg11_bn': 'download.pytorch.org/models/v...,
##'vgg13_bn': 'download.pytorch.org/models/v...,
##'vgg16_bn': 'download.pytorch.org/models/v...,
##'vgg19_bn': 'download.pytorch.org/models/v...,
##}
#cd craft-text-detector
#install craft python setup.py install
#download weights
#C:\Users\{user}\.craft_text_detector\weights from huggingface.co/boomb0om/CRAFT...
#craft_mlt_25k.pth
#craft_refiner_CTW1500.pth
#fix error ValueError: setting an array element with a sequence.
#find craft_utils.py in git cloned craft-text-detector
#nano craft_utils.py
#polys = np.array(polys) to polys = np.array(polys, dtype=object)
#predict.py
#polys_as_ratio = np.array(polys_as_ratio) to polys_as_ratio = np.array(polys_as_ratio, dtype=object)
import warnings
warnings.filterwarnings('ignore')
from craft_text_detector import (
read_image,
load_craftnet_model,
load_refinenet_model,
get_prediction,
export_detected_regions,
export_extra_results,
empty_cuda_cache
)
set image path and export folder directory
image = '1.png' # can be filepath, PIL image or numpy array
output_dir = 'outputs/'
read image
image = read_image(image)
load models
refine_net = load_refinenet_model(cuda=True)
craft_net = load_craftnet_model(cuda=True)
perform prediction
prediction_result = get_prediction(
image=image,
craft_net=craft_net,
refine_net=refine_net,
text_threshold=0.7,
link_threshold=0.4,
low_text=0.4,
cuda=True,
long_size=1280
)
export detected text regions
exported_file_paths = export_detected_regions(
image=image,
regions=prediction_result["boxes"],
output_dir=output_dir,
rectify=True
)
export heatmap, detection points, box visualization
export_extra_results(
image=image,
regions=prediction_result["boxes"],
heatmaps=prediction_result["heatmaps"],
output_dir=output_dir
)
unload models from gpu
empty_cuda_cache()
print('all done')