• اسکریپت آپلودر با python
    #1
    Note 
    [PYTHON]
    # This is an example of file uploading using scrapy to a server that uses uberuploader.
    # The spider logins to the page to make the upload, some webpages dont need to be logged in order to upload.
    # IMPORTANT : You should increment DOWNLOAD_TIMEOUT in settings.py, but by the time this snippet was wrote isnt working fine. I recompile the whole scrapy changing the default of 3min.
    # Observations about my snippet:
    # Is possible this is not the best code, please comment corrections.
    # This could or should be implemented in a downloader middleware or pipeline?
    # Don't show uploading state.
    # Mime message creating could or should be in other place.

    class fileUploadSpider(CrawlSpider):
    name = "spidertrigger.upload"
    allowed_domains = ["uploadhost.com"]
    start_urls = [
    "http://www.uploadhost.com/url_to_login_page",
    ]

    def parse(self,response):
    return [FormRequest.from_response(
    response,
    formdata={'user':'username','password':'secret'},
    callback=self.after_login,
    )]

    def after_login(self,response):
    if "Log in to your account" in response.body:
    self.log("Login Failed",level=log.ERROR)
    return
    else:
    dataObjetcs = DataObject.objects.all()#I am using django ORM
    for data in dataObjects:
    #note the next line, the url should point to ubr_link_upload.php
    # I will get the random ticket to be able to upload file,rnd_id is hardcoded but could be generated via code
    yield Request(
    url='http://upload.uploadhost.com/upload/ubr_link_upload.php?rnd_id=1280793046605',
    callback=self.obtener_id_upload,
    meta={'data' : data},
    )
    return

    def get_id_upload(self,response):
    #here I will get the upload id
    hxs = HtmlXPathSelector(response)
    data = response.request.meta['data']
    file_name = settings.IMAGES_STORE+'/'+data.path+'.zip' #here I require that the file exist (you should add more code here , like a try catch)
    #get the upload_id
    upload_id = re.search('\\\"\w+\\\"',hxs.select('/html/body').extract()[0]).group(0).replace('\"','')

    #build the fields that the request will have
    fields = { 'title':data.nombre,
    'adpaid' :'0',
    'private':'no',
    'category[]':'1',
    'fontcolor':'black',
    'helpbox' : 'Font size: [size=50%]small text[/size]',
    'textarea':'',
    'fontsize':'',
    'compare' : '14936',
    }
    files = {'upfile_0':file_name,}
    headers,body = self.get_mime(fields,files)
    print 'Iniciando Request POST'
    #next NOTE that the url should point to cgi-bin/ubr_upload.pl with the proper upload_id
    yield FormRequest (
    url='http://upload.uploadhost.com/cgi-bin/ubr_upload.pl?upload_id='+upload_id,
    method='POST',
    body=body,
    meta={'data' : data},
    headers = headers,
    callback=self.lastcall,
    )

    return

    #this lastcall is for postprocessing the upload data, is an artificial example to obtain the id of the upload object on the webpage
    def lastcall(self,response):

    hxs = HtmlXPathSelector(response)
    linkUploaded = hxs.select('//div[@id=\'col2contentright\']/p/strong/a/@href').extract()[0]
    idUploaded = re.search('\d+',linkUploaded)
    print "Success Uploaded "+ ipUploaded
    return

    #this next code will need more improvement, is working for now. It could have problems with binary data!
    def get_mime(self,fields,files):
    BOUNDARY = '----------BOUNDARY_$'
    # CRLF =
    L = StringIO()
    for key in fields.keys() :
    value = fields[key]
    L.write('--' + BOUNDARY+'\r\n')
    L.write('Content-Disposition: form-data; name="%s"' % key+'\r\n')
    L.write(''+'\r\n')
    L.write(value.encode('utf-8')+'\r\n')
    for key in files.keys():
    value = files[key]
    filename = value
    L.write('--' + BOUNDARY+'\r\n')
    L.write('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, 'full.zip')+'\r\n')
    L.write('Content-Type: %s' % self.get_content_type(filename)+'\r\n')
    L.write(''+'\r\n')
    L.write(open(value,'rb').read()+'\r\n')
    L.write('--' + BOUNDARY + '--'+'\r\n')
    L.write(''+'\r\n')

    body = L.getvalue()

    content_type = {'Content-Type': 'multipart/form-data; boundary=%s' % BOUNDARY }
    return content_type,body

    def get_content_type(self,filename):
    return mimetypes.guess_type(filename)[0] or 'application/octet-stream'

    # Snippet imported from snippets.scrapy.org (which no longer works)
    # author: llazzaro
    # date : Aug 15, 2010


    [/PYTHON]

    پاسخ
    ایجاد موضوع جدید   پاسخ به موضوع  

    موضوعات مرتبط با این موضوع...
    موضوع نویسنده پاسخ بازدید آخرین ارسال
    Note تولید کننده ip در python WWWorker 1 831 29-05-2014 ساعت 13:03
    آخرین ارسال: Amirio
    Note اپلود فایل با python WWWorker 0 617 28-05-2014 ساعت 19:41
    آخرین ارسال: WWWorker
    Note چاپ تاریخ در python WWWorker 0 568 14-01-2014 ساعت 14:02
    آخرین ارسال: WWWorker
    Note خواندن فایلهای CSV در python WWWorker 0 832 07-11-2013 ساعت 09:17
    آخرین ارسال: WWWorker
    Note Python - چک کننده استحکام پسورد WWWorker 0 547 07-11-2013 ساعت 09:16
    آخرین ارسال: WWWorker
    Note ست کردن utf-8 برای فایل های python WWWorker 0 740 07-11-2013 ساعت 09:14
    آخرین ارسال: WWWorker
    Note Python Password Generator تولید کننده پسورد WWWorker 0 583 07-11-2013 ساعت 09:12
    آخرین ارسال: WWWorker
    Note تولید اعداد و کاراکتر های تصادفی در python Daniel 0 1,465 09-12-2012 ساعت 00:04
    آخرین ارسال: Daniel
    Note تغییر سایز گروهی تصاویر با اسکریپت python Daniel 0 682 08-12-2012 ساعت 23:37
    آخرین ارسال: Daniel
    Note Python xss scanner v1 Evil shadow 0 731 23-07-2012 ساعت 18:43
    آخرین ارسال: Evil shadow

    کاربرانِ درحال بازدید از این موضوع:   1 مهمان