Skip to content

Commit

Permalink
use uchardet
Browse files Browse the repository at this point in the history
  • Loading branch information
wang-bin committed Nov 2, 2015
1 parent 479e57a commit 1439d55
Show file tree
Hide file tree
Showing 12 changed files with 126 additions and 120 deletions.
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[submodule "contrib/libchardet"]
path = contrib/libchardet
url = https://github.com/wang-bin/libchardet.git
[submodule "contrib/capi"]
path = contrib/capi
url = https://github.com/wang-bin/capi.git
[submodule "tools/build_ffmpeg"]
path = tools/build_ffmpeg
url = https://github.com/wang-bin/build_ffmpeg.git
[submodule "contrib/uchardet"]
path = contrib/uchardet
url = https://github.com/BYVoid/uchardet.git
1 change: 1 addition & 0 deletions QtAV.pro
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ contains(QT_CONFIG, opengl):!no-gl:!no-widgets {
# no-xxx can set in $$PWD/user.conf
!no-openal: OptionalDepends *= openal
!no-libass: OptionalDepends *= libass
!no-uchardet: OptionalDepends *= uchardet
win32:macx:!android:!winrt:!no-portaudio: OptionalDepends *= portaudio
win32 {
!no-xaudio2: OptionalDepends *= xaudio2
Expand Down
3 changes: 1 addition & 2 deletions config.tests/gentest.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
SCRIPT_DIR=${0%/*}
. $SCRIPT_DIR/../scripts/functions.sh

help_post(){
echo "This will create a test for $1. You may change the default value: \"#include <$1.h>\" in $1/main.cpp and \"LIBS += -l$1\" in $1/$1.pro"
Expand All @@ -24,7 +23,7 @@ LIBS += -l$NAME
EOF

YEAR=`date +%Y`
COPY=../templates/COPYRIGHT.h
COPY=../tools/templates/COPYRIGHT.h
cat $COPY | sed "s/%YEAR%/$YEAR/g" > $NAME/main.cpp
cat >> $NAME/main.cpp <<EOF
#include <${NAME}.h>
Expand Down
25 changes: 25 additions & 0 deletions config.tests/uchardet/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/******************************************************************************
QtAV: Media play library based on Qt and FFmpeg
Copyright (C) 2015 Wang Bin <[email protected]>
* This file is part of QtAV
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
******************************************************************************/
#include <uchardet.h>

void test() {
uchardet_new();
}
5 changes: 5 additions & 0 deletions config.tests/uchardet/uchardet.pro
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
include(../paths.pri)

TARGET = uchardet_test
SOURCES += main.cpp
LIBS += -luchardet
1 change: 0 additions & 1 deletion contrib/libchardet
Submodule libchardet deleted from 38c9fb
1 change: 1 addition & 0 deletions contrib/uchardet
Submodule uchardet added at 84e292
53 changes: 53 additions & 0 deletions contrib/uchardet.pri
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
UCHARDET_SRC = $$PWD/uchardet/src
INCLUDEPATH += $$UCHARDET_SRC
HEADERS = \
$$UCHARDET_SRC/uchardet.h \
$$UCHARDET_SRC/nsMBCSGroupProber.h \
$$UCHARDET_SRC/nsGB2312Prober.h \
$$UCHARDET_SRC/nsUTF8Prober.h \
$$UCHARDET_SRC/nsBig5Prober.h \
$$UCHARDET_SRC/nsEUCTWProber.h \
$$UCHARDET_SRC/nsCharSetProber.h \
$$UCHARDET_SRC/nsSBCSGroupProber.h \
$$UCHARDET_SRC/nsCodingStateMachine.h \
$$UCHARDET_SRC/nsEUCKRProber.h \
$$UCHARDET_SRC/nsUniversalDetector.h \
$$UCHARDET_SRC/nsSJISProber.h \
$$UCHARDET_SRC/nsLatin1Prober.h \
$$UCHARDET_SRC/nsSBCharSetProber.h \
$$UCHARDET_SRC/nscore.h \
$$UCHARDET_SRC/CharDistribution.h \
$$UCHARDET_SRC/nsPkgInt.h \
$$UCHARDET_SRC/nsEscCharsetProber.h \
$$UCHARDET_SRC/nsHebrewProber.h \
$$UCHARDET_SRC/prmem.h \
$$UCHARDET_SRC/JpCntx.h \
$$UCHARDET_SRC/nsEUCJPProber.h

SOURCES = \
$$UCHARDET_SRC/uchardet.cpp \
$$UCHARDET_SRC/nsEUCTWProber.cpp \
$$UCHARDET_SRC/LangCyrillicModel.cpp \
$$UCHARDET_SRC/nsSBCharSetProber.cpp \
$$UCHARDET_SRC/CharDistribution.cpp \
$$UCHARDET_SRC/nsLatin1Prober.cpp \
$$UCHARDET_SRC/nsEscSM.cpp \
$$UCHARDET_SRC/nsUTF8Prober.cpp \
$$UCHARDET_SRC/nsBig5Prober.cpp \
$$UCHARDET_SRC/LangBulgarianModel.cpp \
$$UCHARDET_SRC/nsGB2312Prober.cpp \
$$UCHARDET_SRC/nsCharSetProber.cpp \
$$UCHARDET_SRC/nsEscCharsetProber.cpp \
$$UCHARDET_SRC/nsEUCKRProber.cpp \
$$UCHARDET_SRC/nsEUCJPProber.cpp \
$$UCHARDET_SRC/nsSJISProber.cpp \
$$UCHARDET_SRC/nsMBCSSM.cpp \
$$UCHARDET_SRC/nsHebrewProber.cpp \
$$UCHARDET_SRC/nsUniversalDetector.cpp \
$$UCHARDET_SRC/nsSBCSGroupProber.cpp \
$$UCHARDET_SRC/nsMBCSGroupProber.cpp \
$$UCHARDET_SRC/LangThaiModel.cpp \
$$UCHARDET_SRC/LangHebrewModel.cpp \
$$UCHARDET_SRC/JpCntx.cpp \
$$UCHARDET_SRC/LangHungarianModel.cpp \
$$UCHARDET_SRC/LangGreekModel.cpp
1 change: 1 addition & 0 deletions debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Build-Depends: debhelper (>= 9),
libxv-dev,
libva-dev,
libegl1-mesa-dev,
libuchardet-dev,
Standards-Version: 3.9.5
Homepage: http://www.qtav.org

Expand Down
11 changes: 6 additions & 5 deletions src/libQtAV.pro
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,12 @@ sse2|config_sse2|contains(TARGET_ARCH_SUB, sse2): CONFIG *= sse2 config_simd
PROJECTROOT = $$PWD/..
!include(libQtAV.pri): error("could not find libQtAV.pri")
preparePaths($$OUT_PWD/../out)
!no_libchardet:exists($$PROJECTROOT/contrib/libchardet/libchardet.pri) {
include($$PROJECTROOT/contrib/libchardet/libchardet.pri)
DEFINES += QTAV_HAVE_CHARDET=1 BUILD_CHARDET_STATIC
} else {
warning("contrib/libchardet is missing. run 'git submodule update --init' first")
config_uchardet {
DEFINES += LINK_UCHARDET
LIBS *= -luchardet
} else:exists($$PROJECTROOT/contrib/uchardet/src/uchardet.h) {
include($$PROJECTROOT/contrib/uchardet.pri)
DEFINES += BUILD_UCHARDET
}
exists($$PROJECTROOT/contrib/capi/capi.pri) {
include($$PROJECTROOT/contrib/capi/capi.pri)
Expand Down
131 changes: 26 additions & 105 deletions src/subtitle/CharsetDetector.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/******************************************************************************
QtAV: Media play library based on Qt and FFmpeg
Copyright (C) 2014 Wang Bin <[email protected]>
Copyright (C) 2014-2015 Wang Bin <[email protected]>
* This file is part of QtAV
Expand All @@ -18,130 +18,51 @@
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
******************************************************************************/

#include "CharsetDetector.h"
#include "QtAV/QtAV_Global.h"
#include <cassert>
#include <QtCore/QLibrary>
#include "utils/Logger.h"

#if QTAV_HAVE(CHARDET)
#include <chardet.h>
class CharDet {
public:
bool isLoaded() { return true;}
};
#ifdef LINK_UCHARDET
#include <uchardet/uchardet.h>
#define HAVE_UCHARDET
#else
//from https://github.com/cnangel/libchardet, chardet.h
#define CHARDET_RESULT_OK 0
#define CHARDET_RESULT_NOMEMORY (-1)
#define CHARDET_RESULT_INVALID_DETECTOR (-2)

#define CHARDET_MAX_ENCODING_NAME 64

typedef void* chardet_t;

class dll_helper {
public:
dll_helper(const QString& soname) {
m_lib.setFileName(soname);
if (m_lib.load())
qDebug("%s loaded", m_lib.fileName().toUtf8().constData());
else
qDebug("can not load %s: %s", m_lib.fileName().toUtf8().constData(), m_lib.errorString().toUtf8().constData());

}
virtual ~dll_helper() { m_lib.unload();}
bool isLoaded() const { return m_lib.isLoaded(); }
void* resolve(const char *symbol) { return (void*)m_lib.resolve(symbol);}
private:
QLibrary m_lib;
};

class CharDet : public dll_helper {
public:
typedef int chardet_create_t(chardet_t* pdet);
typedef void chardet_destroy_t(chardet_t det);
typedef int chardet_handle_data_t(chardet_t det, const char* data, unsigned int len);
typedef int chardet_data_end_t(chardet_t det);
typedef int chardet_reset_t(chardet_t det);
typedef int chardet_get_charset_t(chardet_t det, char* namebuf, unsigned int buflen);
#ifdef BUILD_UCHARDET
#include "uchardet.h"
#define HAVE_UCHARDET
#endif
#endif //LINK_UCHARDET

CharDet() : dll_helper("chardet") {
fp_chardet_create = (chardet_create_t*)resolve("chardet_create");
fp_chardet_destroy = (chardet_destroy_t*)resolve("chardet_destroy");
fp_chardet_handle_data = (chardet_handle_data_t*)resolve("chardet_handle_data");
fp_chardet_data_end = (chardet_data_end_t*)resolve("chardet_data_end");
fp_chardet_reset = (chardet_reset_t*)resolve("chardet_reset");
fp_chardet_get_charset = (chardet_get_charset_t*)resolve("chardet_get_charset");
}
int chardet_create(chardet_t* pdet) {
assert(fp_chardet_create);
return fp_chardet_create(pdet);
}
void chardet_destroy(chardet_t det) {
assert(fp_chardet_destroy);
fp_chardet_destroy(det);
}
int chardet_handle_data(chardet_t det, const char* data, unsigned int len) {
assert(fp_chardet_handle_data);
return fp_chardet_handle_data(det, data, len);
}
int chardet_data_end(chardet_t det) {
assert(fp_chardet_data_end);
return fp_chardet_data_end(det);
}
int chardet_reset(chardet_t det) {
assert(fp_chardet_reset);
return fp_chardet_reset(det);
}
int chardet_get_charset(chardet_t det, char* namebuf, unsigned int buflen) {
assert(fp_chardet_get_charset);
return fp_chardet_get_charset(det, namebuf, buflen);
}
private:
chardet_create_t* fp_chardet_create;
chardet_destroy_t* fp_chardet_destroy;
chardet_handle_data_t* fp_chardet_handle_data;
chardet_data_end_t* fp_chardet_data_end;
chardet_reset_t* fp_chardet_reset;
chardet_get_charset_t* fp_chardet_get_charset;
};
#endif //QTAV_HAVE(CHARDET)
class CharsetDetector::Private : public CharDet
class CharsetDetector::Private
{
public:
Private()
: CharDet()
, m_det(NULL)
: m_det(NULL)
{
if (!isLoaded())
return;
int ret = chardet_create(&m_det);
if (ret != CHARDET_RESULT_OK)
m_det = NULL;
#ifdef HAVE_UCHARDET
m_det = uchardet_new();
#endif
}
~Private() {
if (!m_det)
return;
chardet_destroy(m_det);
#ifdef HAVE_UCHARDET
uchardet_delete(m_det);
#endif
m_det = NULL;
}

QByteArray detect(const QByteArray& data) {
#ifdef HAVE_UCHARDET
if (!m_det)
return QByteArray();
int ret = chardet_handle_data(m_det, data.constData(), data.size());
if (ret != CHARDET_RESULT_OK)
if (uchardet_handle_data(m_det, data.constData(), data.size()) != 0)
return QByteArray();
chardet_data_end(m_det);
QByteArray cs(256, ' ');
chardet_get_charset(m_det, cs.data(), cs.size());
chardet_reset(m_det);
uchardet_data_end(m_det);
QByteArray cs(uchardet_get_charset(m_det));
uchardet_reset(m_det);
return cs.trimmed();
#else
return QByteArray();
#endif
}

chardet_t m_det;
uchardet_t m_det;
};

CharsetDetector::CharsetDetector()
Expand Down
8 changes: 4 additions & 4 deletions src/subtitle/CharsetDetector.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/******************************************************************************
QtAV: Media play library based on Qt and FFmpeg
Copyright (C) 2014 Wang Bin <[email protected]>
Copyright (C) 2014-2015 Wang Bin <[email protected]>
* This file is part of QtAV
Expand All @@ -19,8 +19,8 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
******************************************************************************/

#ifndef QTAV_CHARSETDETECTOR_H
#define QTAV_CHARSETDETECTOR_H
#ifndef QTAV_UCHARDET_H
#define QTAV_UCHARDET_H

#include <QtCore/QByteArray>

Expand All @@ -41,4 +41,4 @@ class CharsetDetector
Private *priv;
};

#endif // QTAV_CHARSETDETECTOR_H
#endif // QTAV_UCHARDET_H

0 comments on commit 1439d55

Please sign in to comment.