اسکریپت آپلودر با python
#1
Note 
[PYTHON]
# This is an example of file uploading using scrapy to a server that uses uberuploader.
# The spider logins to the page to make the upload, some webpages dont need to be logged in order to upload.
# IMPORTANT : You should increment DOWNLOAD_TIMEOUT in settings.py, but by the time this snippet was wrote isnt working fine. I recompile the whole scrapy changing the default of 3min.
# Observations about my snippet:
# Is possible this is not the best code, please comment corrections.
# This could or should be implemented in a downloader middleware or pipeline?
# Don't show uploading state.
# Mime message creating could or should be in other place.

class fileUploadSpider(CrawlSpider):
name = "spidertrigger.upload"
allowed_domains = ["uploadhost.com"]
start_urls = [
"http://www.uploadhost.com/url_to_login_page",
]

def parse(self,response):
return [FormRequest.from_response(
response,
formdata={'user':'username','password':'secret'},
callback=self.after_login,
)]

def after_login(self,response):
if "Log in to your account" in response.body:
self.log("Login Failed",level=log.ERROR)
return
else:
dataObjetcs = DataObject.objects.all()#I am using django ORM
for data in dataObjects:
#note the next line, the url should point to ubr_link_upload.php
# I will get the random ticket to be able to upload file,rnd_id is hardcoded but could be generated via code
yield Request(
url='http://upload.uploadhost.com/upload/ubr_link_upload.php?rnd_id=1280793046605',
callback=self.obtener_id_upload,
meta={'data' : data},
)
return

def get_id_upload(self,response):
#here I will get the upload id
hxs = HtmlXPathSelector(response)
data = response.request.meta['data']
file_name = settings.IMAGES_STORE+'/'+data.path+'.zip' #here I require that the file exist (you should add more code here , like a try catch)
#get the upload_id
upload_id = re.search('\\\"\w+\\\"',hxs.select('/html/body').extract()[0]).group(0).replace('\"','')

#build the fields that the request will have
fields = { 'title':data.nombre,
'adpaid' :'0',
'private':'no',
'category[]':'1',
'fontcolor':'black',
'helpbox' : 'Font size: [size=50%]small text[/size]',
'textarea':'',
'fontsize':'',
'compare' : '14936',
}
files = {'upfile_0':file_name,}
headers,body = self.get_mime(fields,files)
print 'Iniciando Request POST'
#next NOTE that the url should point to cgi-bin/ubr_upload.pl with the proper upload_id
yield FormRequest (
url='http://upload.uploadhost.com/cgi-bin/ubr_upload.pl?upload_id='+upload_id,
method='POST',
body=body,
meta={'data' : data},
headers = headers,
callback=self.lastcall,
)

return

#this lastcall is for postprocessing the upload data, is an artificial example to obtain the id of the upload object on the webpage
def lastcall(self,response):

hxs = HtmlXPathSelector(response)
linkUploaded = hxs.select('//div[@id=\'col2contentright\']/p/strong/a/@href').extract()[0]
idUploaded = re.search('\d+',linkUploaded)
print "Success Uploaded "+ ipUploaded
return

#this next code will need more improvement, is working for now. It could have problems with binary data!
def get_mime(self,fields,files):
BOUNDARY = '----------BOUNDARY_$'
# CRLF =
L = StringIO()
for key in fields.keys() :
value = fields[key]
L.write('--' + BOUNDARY+'\r\n')
L.write('Content-Disposition: form-data; name="%s"' % key+'\r\n')
L.write(''+'\r\n')
L.write(value.encode('utf-8')+'\r\n')
for key in files.keys():
value = files[key]
filename = value
L.write('--' + BOUNDARY+'\r\n')
L.write('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, 'full.zip')+'\r\n')
L.write('Content-Type: %s' % self.get_content_type(filename)+'\r\n')
L.write(''+'\r\n')
L.write(open(value,'rb').read()+'\r\n')
L.write('--' + BOUNDARY + '--'+'\r\n')
L.write(''+'\r\n')

body = L.getvalue()

content_type = {'Content-Type': 'multipart/form-data; boundary=%s' % BOUNDARY }
return content_type,body

def get_content_type(self,filename):
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'

# Snippet imported from snippets.scrapy.org (which no longer works)
# author: llazzaro
# date : Aug 15, 2010


[/PYTHON]

پاسخ
ایجاد موضوع جدید   پاسخ به موضوع  

موضوعات مرتبط با این موضوع...
موضوع نویسنده پاسخ بازدید آخرین ارسال
Note تولید کننده ip در python WWWorker 1 646 29-05-2014 ساعت 13:03
آخرین ارسال: Amirio
Note اپلود فایل با python WWWorker 0 495 28-05-2014 ساعت 19:41
آخرین ارسال: WWWorker
Note چاپ تاریخ در python WWWorker 0 442 14-01-2014 ساعت 14:02
آخرین ارسال: WWWorker
Note خواندن فایلهای CSV در python WWWorker 0 511 07-11-2013 ساعت 09:17
آخرین ارسال: WWWorker
Note Python - چک کننده استحکام پسورد WWWorker 0 436 07-11-2013 ساعت 09:16
آخرین ارسال: WWWorker
Note ست کردن utf-8 برای فایل های python WWWorker 0 423 07-11-2013 ساعت 09:14
آخرین ارسال: WWWorker
Note Python Password Generator تولید کننده پسورد WWWorker 0 468 07-11-2013 ساعت 09:12
آخرین ارسال: WWWorker
Note تولید اعداد و کاراکتر های تصادفی در python Daniel 0 1,106 09-12-2012 ساعت 00:04
آخرین ارسال: Daniel
Note تغییر سایز گروهی تصاویر با اسکریپت python Daniel 0 596 08-12-2012 ساعت 23:37
آخرین ارسال: Daniel
Note Python xss scanner v1 Evil shadow 0 632 23-07-2012 ساعت 18:43
آخرین ارسال: Evil shadow

کاربرانِ درحال بازدید از این موضوع:   1 مهمان