The Code-Bin
Links
Home
Add your code!
All Listings
About
Latest Entry
Featured Scripts
Author's Website
Latest Entries
FFMPEG Thumbnail Scr...
PHP, 0.8KB
Jul. 29, 10:24pm
John
Z80 Assembler, 190 bytes
Feb. 17, 3:36am
John
Z80 Assembler, 176 bytes
Sep. 13, 2:19am
John
Z80 Assembler, 77 bytes
Sep. 13, 2:18am
John
Z80 Assembler, 209 bytes
Sep. 13, 2:17am
scrapestuff
Posted by: tyler | September 25, 2009 @ 4:46pm
Python Code
[
Download
]
from Tkinter import * from lxml.html import parse import re curProxy = {} proxies = [] class App: def __init__(self, master): counter = "what" frame = Frame(master) frame.pack() self.hi_there = Button(frame, text="Scrape", command=self.scrape) self.hi_there.pack(side=LEFT) self.scrollbar = Scrollbar(frame, orient=VERTICAL) self.listbox = Listbox(frame, yscrollcommand=self.scrollbar.set) self.scrollbar.config(command=self.listbox.yview) self.scrollbar.pack(side=RIGHT, fill=Y) self.listbox.pack() def scrape(self): urls = ['http://www.proxy4free.com/page1.html'] ## 'http://www.proxy4free.com/page2.html', ## 'http://proxynext.com/proxylist1.php', ## 'http://proxynext.com/proxylist2.php'] for url in urls: doc = parse(url).getroot() for td in doc.cssselect('td'): ipPattern = re.compile("\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?!(-|.|:))") ipMatch = ipPattern.match(td.text_content()) portPattern = re.compile("\d{2,4}(?!(-|.|:))") portMatch = portPattern.match(td.text_content()) if ipMatch: curProxy['ip'] = td.text_content() elif portMatch: curProxy['port'] = td.text_content() print curProxy proxies.append(curProxy) for i in range(20): self.listbox.insert(END, proxies[i]['ip'] + ':' + proxies[i]['port']) root = Tk() app = App(root) root.mainloop()
Syntax Highlighting
[
Open in new window
]
Author Comments
none
Rating
4.43 / 8
86 Votes
http://codebin.yi.org/394
page generated in 0.00 seconds