# 准备图像和文本 image = preprocess(Image.open("path_to_image.jpg")).unsqueeze(0).to(device) text = clip.tokenize(["a description of what you are looking for"]).to(device)
# 计算特征并比较 with torch.no_grad(): image_features = model.encode_image(image) text_features = model.encode_text(text)