This is very roughly implemented… but it’s a new feature I want to add: how to get the text from an existing image.
# grabscreen.py import win32clipboard import pyscreenshot as ImageGrab import os from pynput.mouse import Listener import sys import tkinter as tk from gtts import gTTS import time from glob import glob from PIL import Image, ImageTk ''' Grab a text from an image grabbed clickin on the left top corner and right down corner of the part of the screen with the text. It returns it in the console Then... it transform it into audio. ''' import pytesseract def grab(x, y, w, h):im = ImageGrab.grab(bbox=(x, y, w, h)) save(im) ocr("im.png", mp3=1) def save(im): im.save('im.png') os.startfile('im.png') trycount = 0 def ocr(image, mp3=0): global trycount pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract' text = pytesseract.image_to_string(image) print(text) if mp3 == 1: try: create_mp3(text) except: trycount += 1 if trycount < 3: ocr() else: print("Some problems with connection maybe") trycount2 = 0 def create_mp3(text, lang="en"): s = gTTS(text, lang=lang) print("Wait a second...") time.sleep(3) s.save(f"text.mp3") os.system("text.mp3") trycount2 = 0 def clip(): global trycount2 win32clipboard.OpenClipboard() data = win32clipboard.GetClipboardData() win32clipboard.CloseClipboard() try: create_mp3(data) except: trycount2 += 1 if trycount2 < 3: ocr() else: print("Some problems with connection maybe") trycount2 = 0 click1 = 0 x1 = 0 y1 = 0 def on_click(x, y, button, pressed): global click1, x1, y1, listener if pressed: if click1 == 0: x1 = x y1 = y click1 = 1 else: grab(x1, y1, x, y) listener.stop() sys.exit() def start(): global listener root.destroy() print("Click once on top left and once on bottom right") # with Listener(on_move=on_move, on_click=on_click, on_scroll=on_scroll) as listener: with Listener(on_click=on_click) as listener: listener.join() # listener.stop() # sys.exit() root = tk.Tk() root.title("GRAUTESC 2 - Text to Audio APP") root.geometry("600x500") but = tk.Button(root, text="Grab to audio", command=start, width=20, height=3, bg="gold") but.pack() butclip = tk.Button(root, text="Audio from clipboard", command=clip, width=20,height=3, bg="gold") butclip.pack() # # HELP # buthelp = tk.Button(root, text="Help", command=clip, width=20, height=3, bg="gold") # buthelp.pack() counter = 0 def lab_print(event): ocr(slides[0], mp3=0) # global counter, slides, label # counter += 1 # print(counter) # if counter < len(slides) - 1: # img = tk.PhotoImage(file=slides[counter]) # label["image"] = img # label.image = img # label.pack() # else: # counter = 0 # SLIDES slides = [x for x in glob("*.png")] image = Image.open(slides[0]) print(slides[0]) image = image.resize((200, 400), Image.ANTIALIAS) img = ImageTk.PhotoImage(image=image) label = tk.Label(root, image=img) label.pack() label.bind("<Button-1>", lab_print) root.mainloop()