[PyQt] PyQt5.6: issues with QBAsicTimer when running a script using PyQtWebKitWidgets

Dave Sampson samper.d at gmail.com
Mon Jan 8 12:49:51 GMT 2018


Barry,

Thanks for the reply and assistance in solving this issue.

As you suggested, I tried to remove all dependencies for Anaconda and 
Spyder, and started to hit some more brick walls with versionitis of PyQt5.

So here we go.

  * I created a virtualenv right on my Ubuntu system
  * When I installed various packages using PIP I could not install
    PyQt5.6 like what I had in anaconda, I had to choose at minimum 5.7
    so dove all in with 5.9. And then the fun really started.
  * PyQt 5.6 to 5.9 conversion lead to broken imports. I managed to fix
    those and handle cases for differences between 5.6 and 5.9 using
    try/exept pairs (see new BROKEN script below)
  * QWebPage changed locations and namespace (some docs here:
    http://doc.qt.io/qt-5/qtwebenginewidgets-qtwebkitportingguide.html)
  * Then of course the PyQT C library changed so I had to find new
    import modules and new sub-modules.
  * Then PyQt 5.9 introduces some asynchronous functions that expect
    call back functions. I really don't understand call back functions,
    but I tried creating one anyhow.
  * The new "pip list" for the script below includes the following
      o """beautifulsoup4==4.6.0
        bs4==0.0.1)
        certifi==2017.11.5
        chardet==3.0.4
        idna==2.6
        pip==9.0.1
        pkg-resources==0.0.0
        PyQt5==5.9.2
        QtPy==1.3.1
        requests==2.18.4
        setuptools==38.2.5
        sip==4.19.6
        urllib3==1.22
        wheel==0.30.0"""

revised script with new issues:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
#  Copyright (c) Tue Dec 19 21:00:56 2017, David Sampson 
(samper.d at gmail.com)
#
#  license: GNU LGPL
#
#  This library is free software; you can redistribute it and/or
#  modify it under the terms of the GNU Lesser General Public
#  License as published by the Free Software Foundation; either
#  version 2.1 of the License, or (at your option) any later version.


"""
Created on Tue Dec 19 21:00:56 2017

@author: sampson
"""

# Imports
import sys
import requests
#from config import *
from bs4 import BeautifulSoup
from PyQt5.QtCore import QThread, QBasicTimer, QCoreApplication
try:
     from PyQt5.QtWebKitWidgets import QWebPage
     pyqt="5.6"
except:
     from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage
     pyqt="5.9"

from PyQt5.QtWidgets import QApplication

# Variables
#url = 'http://webscraping.com'
#url='http://www.amazon.com'
#TickerList = "AEU.UN, BMO, BNS"
Ticker = "BNS"
TickerList = "AEU.UN, BMO"
url="http://www.JsEnabledSite.com"

# Constants


# Main function




class Render(QWebPage):
     """Render HTML with PyQt5 WebKit."""
     def __init__(self, html):
         self.html = None
         self.app = QCoreApplication.instance()
         if self.app is None:
             self.app = QApplication(sys.argv)
             print("Creating new QApplication instance")
         else:
             print("using Existing instance of QApplication: %s" % 
str(self.app))
         QWebPage.__init__(self)
         self.timer = QBasicTimer()
         self.timer.start(5000, self)
         if self.timer.isActive() == True:
             print("timer is active")
             print("timer ID: %s" % str(self.timer.timerId()))
         else:
             print("timer is inactive")
         self.loadStarted.connect(self._loadStarted)
         self.loadProgress.connect(self._loadProgress)
         self.loadFinished.connect(self._loadFinished)
         try:
             self.mainFrame().setHtml(html)
         except:
             self.setHtml(html)
         self.app.exec()


     def _loadFinished(self, result):
         if pyqt == "5.9":
             #self.html = self.toHtml(get_html())
             print("fetching HTML")
             self.toHtml(self.setHtml())
             print("HTML Fetched")

         if pyqt == "5.6":
             self.html = self.mainFrame().toHtml()
             print("Finihsed function passed")

         else:
             print("you are running pyqt version:  unknown")

         print("Load Finished")
         self.timer.stop()
         if self.timer.isActive() == True:
             print("timer is active")
             print("timer ID: %s" % str(self.timer.timerId()))
         else:
             print("timer is inactive")
         QApplication.instance().quit()
         #self.app.quit()

     def _loadStarted(self):
         print("Page is loading content")

     def _loadProgress(self, progress):
         print("Page is loading: %s" % progress)

     def get_html(self, result):
         print(self.html)
         print(result)
         return result



def main():
     print("url: %s" % url)
     get_page(url)


def get_page(Url):
     """
     This module accepts a URL and returns a page with rendered JavaScript
     using PyQt5 Webkit.Stock
     """
     #global app
     # get the raw HTML
     SourceHtml = requests.get(Url).text

     #app = QApplication(sys.argv)
     #app = QApplication.instance()
     #if app is None:
     #   app = QApplication(sys.argv)
     #else:
     #   print("using Existing instance of QApplication: %s" % str(app))
     RenderedHtml = Render(SourceHtml).html
     #app.exec_()

     #sys.exit(app.exec())

     #print(RenderedHtml)
     print("Finished")
     print(RenderedHtml)
     return RenderedHtml


if __name__ == '__main__':
     main()
     #get_page(url)


On 01/01/2018 03:32 AM, Barry wrote:
> What happens if you run your script outside of spyder from the bash 
> prompt?
> Maybe its sypder that is breaking things becuase its in control not 
> your code.
>
> Barry
>
> On 1 Jan 2018, at 04:02, Dave Sampson <samper.d at gmail.com 
> <mailto:samper.d at gmail.com>> wrote:
>
>> Hey Folks,
>>
>> A special thanks to anyone who takes time to read about my current 
>> situation and provide part of or all of the solution.
>>
>> The Issue:
>>
>> ==========
>>
>> As part of a larger project, I am wanting to develop a function, 
>> class or module using python that will accept a URL of a javascript 
>> enabled website and return the page's HTML contents generated by 
>> those JS scripts. Please see the end of this post for a partially 
>> functioning script.
>>
>> I used this reference as my initial source that got me using the PyQt 
>> bindings. 
>> https://impythonist.wordpress.com/2015/01/06/ultimate-guide-for-scraping-javascript-rendered-web-pages/
>>
>> Much of the documentation refers to PyQt4, I am using PyQt5.6, which 
>> is sometimes not in line with more recent PyQt5 documentation. So 
>> there has been lots of trial and error along the way.
>>
>> I am able to run the below script once, however upon a second run of 
>> the script I keep running up against this error: "QBasicTimer::start: 
>> Stopping previous timer failed. Possibly trying to stop from a 
>> different thread".
>>
>> I will need to scrape at minimum 3 separate URL's each time this 
>> module/class is called. I assume that once I get the solution to make 
>> two successful runs then running 10's or 100's should also work.
>>
>> This module will be called from another control module. I have little 
>> interest in persistent objects like QApplication, between the 
>> different calls.
>>
>> At this point I am getting a bit frustrated that I can't just send a 
>> function a URL and receive back HTML in rapid succession. If JS was 
>> not involved I would not have this issue. Using requests, urllib and 
>> BeautifulSoup for walking through the HTMLDOC are all working fine.
>>
>> Please note I am not a C programmer so being able to abstract 
>> concepts from C documentation to python is sometimes a challenge. I 
>> would consider myself an intermediate python scripter who learned on 
>> 2.7 and now consistently use 3.6 for my projects. Pardon if some 
>> syntax smells of 2.x. I try mostly to follow PEP8 once I have solid 
>> code in place.
>>
>> Also note that I am not a classically trained developer, I am 
>> geographer who designs Geographic Information Systems (GIS), so the 
>> concept of classes are still a bit abstract. I understand analogies 
>> of classes for describing cars, robots and a pizza making process, 
>> but leveraging PyQT for web scraping and creating GUI objects that 
>> will never render anything visually is a bit confusing. Maybe someday 
>> urllib will process JS. Generally I use python for automation of data 
>> management tasks and processes in the geomatics domain. So please 
>> assume I need some explanation if your response is "remember when 
>> using classes do this and that", I likely never knew what you will 
>> refer to. Assume I know nothing about classes and the lineage of 
>> inheritance of PyQt objects. (smile)
>>
>> The environment:
>>
>> ============
>>
>> * Ubuntu 16.04 LTS
>>
>> * Anaconda navigator 1.6.11
>>
>>     * Python 3 dedicated environment
>>
>> * Python 3.6.3 (64 bit)
>>
>> * Spyder 3.2.5
>>
>> * PyQt 5.6
>>
>> * Qt 5.6.2
>>
>>
>> Sources:
>>
>> =======
>>
>> These are some of the sources I have used to try and solve this 
>> problem. I did not capture all of the sources used as many other 
>> resources point back to these sources:
>>
>> * 
>> https://stackoverflow.com/questions/6180293/pyqt-timers-cannot-be-started-from-another-thread
>>
>> * 
>> https://forum.qt.io/topic/13459/timers-cannot-be-stopped-from-another-thread-but-how-do-i-stop-start-timer-in-thread
>>
>> * https://github.com/spyder-ide/spyder/issues/974
>>
>> 6. [Web Scraping Primer using Webkit 
>> (https://impythonist.wordpress.com/2015/01/06/ultimate-guide-for-scraping-javascript-rendered-web-pages/)]
>> 7. [How to run PyQt applications within Spyder 
>> (https://github.com/spyder-ide/spyder/wiki/How-to-run-PyQt-applications-within-Spyder)]
>> 8. [PyQT code can not run twice when using Spyder IDE 
>> (https://github.com/spyder-ide/spyder/issues/2970)]
>>
>>
>> Problem solving approaches:
>>
>> ==================
>>
>>  1. The first issue I came across are related to running PyQT object
>>     like QApplication within the Spyder IDE since there is already a
>>     QApplication object created by Spyder. So this problem lead to a
>>     feew different approaches.
>>      1. Moved app = QApplication() outside of the __init__() function
>>         of the class
>>      2. Added a conditional check for QApplication.instance() to
>>         either reuse the existing object or create one if missing.
>>         This depends on if you run the code inside or outside of a QT
>>         environment. For example running at the command line should
>>         create a new instance, where running in Spyder reuses the
>>         existing instance
>>      3. Created the App object in another module that imports this
>>         one. The same result is achieved since the namespace of the
>>         imported module perpetuates the same namespace.
>>      4. I have even tried overriding various names in the namespace
>>         without success. I understand this is dangerous, but wanted
>>         to try and isolate what object is creating the timer.
>>      5. Resetting the whole namespace losses some valuable settings
>>         found in "from config.py import *". for the purpose of the
>>         script below that line can remain commented
>>      6. Moving the creation of the App object outside the class
>>         caused some other issues when the page was loaded and the
>>         app.quit() function is called. So I tried grabbing the app
>>         object created outside the class, but I could never grab onto
>>         it to close it, so it would hang. I wondered if I could then
>>         call a function external to the class
>>  2. So after trying to attack the persistent QApplication Object I
>>     went after trying to solve the QBasicTimer issue.
>>      1. I tried killing the timer with app.killTimer() before the
>>         app.quit() is called once the page is loaded. However, no
>>         timer was found. this suggests that some other object was
>>         creating a timer. I could not track that down.
>>      2. Although the self.app object created by QApplication had a
>>         app.killTimer() function there were no start() or stop()
>>         timer methods, and I could not find out what value was left
>>         on the timer. So I tried creating a timer object within the
>>         class for the app object. This allowed me to start and stop
>>         timers (set to 5 seconds), however even after verifying that
>>         the timer was created, started, killed and then no longer
>>         existing I still get the QBasicTimmer error.
>>      3. In some cases, if I killed the time too early the program
>>         would hang. Killing the timer after the app.quit() function
>>         rendered no timer. So did I create a timer that still ran
>>         after the app died? Or is the timer somewhere else?
>>  3. The next area I started to explore was the concept of creating
>>     threads with QThreads. The sources I read were giving this
>>     concept high hopes. I soon was reminded I am horrible at reading
>>     C documentation and quickly gave up. Besides, even creating
>>     threads may bring those persistent objects into my executed
>>     namespace.
>>  4. External processes was my next approach.
>>      1. Could I use something like "from subprocess import call" and
>>         then I could just call([python this-script.py",
>>         "http:\\this.url.com"]) as a subprocess. Thinking that once
>>         the subprocess would run then the process would die along
>>         with any timers. I could also create the QApplication object
>>         in a jail and not polute my namespace. I could not even get
>>         simple linux commands like "ls" to return anything
>>         meaningful, let alone a "return" value from the script.
>>         result = fail.
>>  5. Why use Anaconda/spyder at all? I came across these types of
>>     responses in various posts to similar probelms. It is a fair
>>     enough question, so let me try to address some points to consider.
>>      1. I work in government and think it is high time that any
>>         public servant who has an idea to automate something in their
>>         workflow should have easy tools to help. Anaconda could be
>>         this tool.
>>      2. If the final solution is to strip out everything except for a
>>         text editor (vim) and command line (BASH) then I will take
>>         it. At the end of the day I need something that works. I tend
>>         to work in these types of stripped down environments through
>>         SSH on servers anyways. But perhaps there is an alternative
>>         solution.
>>      3. I have been using Python and BASH for years and like the
>>         lightweight approach, however it is not for the faint of
>>         heart. I just recently started using Anaconda as a general
>>         data wrangling platform and I like its elegance. I have been
>>         promoting it with other non developers and govies (aka public
>>         servants) playing in the data space. I feel the platform is
>>         and could be a great equalizer. I would feel pretty silly
>>         recomending this platform if it could not support simple
>>         web-scrapping. result = fail!
>>      4. I also just learned about Spyder, through using Anaconda.
>>         Spyder provides me what I need and emulates my lightweight
>>         setup of a text editor, Command Line and File browser setup.
>>         Also I like the debug tools, being able to see all the
>>         variables in the namespace, create stop points and control
>>         step throughs. All of these things I never had at the command
>>         line. and Ipython is quite intriguing. I think I am just at
>>         the start of another journey towards efficiency.
>>      5. I am a big believer that tech tools should not influence or
>>         impede workflows. In theory this should all work in anaconda
>>      6. I find setting up individual virtual environments tedious for
>>         the various python projects I have on the go.
>>
>> That is a general overview of my battles with PyQt for web scrapping 
>> over the past couple of weeks. So now I turn to the mailing list of 
>> creative, professional and motivated PyQt users to let me know what 
>> obvious solution I am missing.
>>
>> So now I present to you fine and dedicated list viewers my script for 
>> consideration. I look forward to learning what solution(s) come forward.
>>
>> I am prepared to be humbled.
>>
>>
>> The Script:
>>
>> ========
>>
>> This module should be able to run once in either the Spyder IPython 
>> Console or an anacoda terminal window. When you run it a second time 
>> you should get the error. I have left many other code snipets intact 
>> using comments to let you know some of the other approaches I have taken.
>>
>> #!/usr/bin/env python3
>> # -*- coding: utf-8 -*-
>> #
>> #  Copyright (c) Tue Dec 19 21:00:56 2017, David Sampson 
>> (samper.d at gmail.com)
>> #
>> #  license: GNU LGPL
>> #
>> #  This library is free software; you can redistribute it and/or
>> #  modify it under the terms of the GNU Lesser General Public
>> #  License as published by the Free Software Foundation; either
>> #  version 2.1 of the License, or (at your option) any later version.
>>
>>
>> """
>> Created on Tue Dec 19 21:00:56 2017
>>
>> @author: sampson
>> """
>>
>> # Imports
>> import sys
>> import requests
>> #from config import *
>> from PyQt5.QtCore import QThread, QBasicTimer, QCoreApplication
>> from PyQt5.QtWebKitWidgets import QWebPage
>> from PyQt5.QtWidgets import QApplication
>>
>> # Variables
>> #url = 'http://webscraping.com'
>> #url='http://www.amazon.com'
>>
>> ### EDIT THIS###
>> url="http://www.JsEnabledSite.com"
>>
>> # Constants
>>
>>
>> # Main function
>> def main():
>>     get_page(url)
>>
>>
>>
>> class Render(QWebPage):
>>     """Render HTML with PyQt5 WebKit."""
>>     def __init__(self, html):
>>         self.html = None
>>         self.app = QCoreApplication.instance()
>>         if self.app is None:
>>             self.app = QApplication(sys.argv)
>>             print("Creating new QApplication instance")
>>         else:
>>             print("using Existing instance of QApplication: %s" % 
>> str(self.app))
>>         QWebPage.__init__(self)
>>         self.timer = QBasicTimer()
>>         self.timer.start(5000, self)
>>         if self.timer.isActive() == True:
>>             print("timer is active")
>>             print("timer ID: %s" % str(self.timer.timerId()))
>>         else:
>>             print("timer is inactive")
>>         self.loadFinished.connect(self._loadFinished)
>>         self.mainFrame().setHtml(html)
>>         self.app.exec()
>>
>>
>>     def _loadFinished(self, result):
>>         self.html = self.mainFrame().toHtml()
>>         print("Load Finished")
>>         self.timer.stop()
>>         if self.timer.isActive() == True:
>>             print("timer is active")
>>             print("timer ID: %s" % str(self.timer.timerId()))
>>         else:
>>             print("timer is inactive")
>>         QApplication.instance().quit()
>>         #self.app.quit()
>>
>>
>>
>>
>> def get_page(Url):
>>     """
>>     This module accepts a URL and returns a page with rendered 
>> JavaScript
>>     using PyQt5 Webkit.Stock
>>     """
>>     #global app
>>     # get the raw HTML
>>     SourceHtml = requests.get(Url).text
>>
>>     #app = QApplication(sys.argv)
>>     #app = QApplication.instance()
>>     #if app is None:
>>     #   app = QApplication(sys.argv)
>>     #else:
>>     #   print("using Existing instance of QApplication: %s" % str(app))
>>     RenderedHtml = Render(SourceHtml).html
>>     #app.exec_()
>>
>>     #sys.exit(app.exec())
>>
>>     #print(RenderedHtml)
>>     print("Finished")
>>     return RenderedHtml
>>
>>
>> if __name__ == '__main__':
>>     main()
>>     #get_page(url)
>>
>>
>> _______________________________________________
>> PyQt mailing list PyQt at riverbankcomputing.com 
>> <mailto:PyQt at riverbankcomputing.com>
>> https://www.riverbankcomputing.com/mailman/listinfo/pyqt

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://www.riverbankcomputing.com/pipermail/pyqt/attachments/20180108/df881d21/attachment-0001.html>


More information about the PyQt mailing list