diff --text --unified --recursive --new-file DBD-ODBC-1.13\dbdimp.c DBD-ODBC-1.13-Unicode\dbdimp.c --- DBD-ODBC-1.13\dbdimp.c Fri Nov 05 04:19:36 2004 +++ DBD-ODBC-1.13-Unicode\dbdimp.c Mon Aug 15 10:47:38 2005 @@ -1537,6 +1537,7 @@ case SQL_REAL: return "REAL"; case SQL_DOUBLE: return "DOUBLE"; case SQL_VARCHAR: return "VARCHAR"; + case SQL_WCHAR: return "UNICODE CHAR"; #ifdef SQL_WVARCHAR case SQL_WVARCHAR: return "UNICODE VARCHAR"; /* added for SQLServer 7 ntext type 2/24/2000 */ #endif @@ -1568,6 +1569,7 @@ #define s_c(x) case x: return #x switch(sqltype) { s_c(SQL_C_CHAR); + s_c(SQL_C_WCHAR); s_c(SQL_C_BIT); s_c(SQL_C_STINYINT); s_c(SQL_C_UTINYINT); @@ -1728,7 +1730,7 @@ /* XXX we should at least allow an attribute to set this */ fbh->ColDisplaySize = 2001; /* XXX! */ #endif - + #ifdef SQL_COLUMN_LENGTH rc = SQLColAttributes(imp_sth->hstmt,i+1,SQL_COLUMN_LENGTH, NULL, 0, NULL ,&fbh->ColLength); @@ -1736,7 +1738,7 @@ dbd_error(h, rc, "describe/SQLColAttributes/SQL_COLUMN_LENGTH"); break; } - + fbh->ColLength += 1; /* add terminator */ #else /* XXX we should at least allow an attribute to set this */ fbh->ColLength = 2001; /* XXX! */ @@ -1760,12 +1762,22 @@ case SQL_BINARY: fbh->ftype = SQL_C_BINARY; break; + case SQL_WCHAR: + case SQL_WVARCHAR: + fbh->ftype = SQL_C_WCHAR; + fbh->ColDisplaySize*=sizeof(WCHAR); // MS SQL returns bytes, Oracle returns characters ... + fbh->ColLength*=sizeof(WCHAR); // MS SQL returns bytes, Oracle returns characters ... + break; case SQL_LONGVARBINARY: fbh->ftype = SQL_C_BINARY; fbh->ColDisplaySize = DBIc_LongReadLen(imp_sth); break; #ifdef SQL_WLONGVARCHAR case SQL_WLONGVARCHAR: /* added for SQLServer 7 ntext type */ + fbh->ftype = SQL_C_WCHAR; + fbh->ColLength*=sizeof(WCHAR); // MS SQL returns bytes, Oracle returns characters ... + fbh->ColDisplaySize = DBIc_LongReadLen(imp_sth)+1; + break; #endif case SQL_LONGVARCHAR: fbh->ColDisplaySize = DBIc_LongReadLen(imp_sth)+1; @@ -2147,7 +2159,7 @@ if (DBIc_NUM_FIELDS(imp_sth) > 0) { DBIc_ACTIVE_on(imp_sth); /* only set for select (?) */ if (ODBC_TRACE_LEVEL(imp_sth) > 0) { - PerlIO_printf(DBIc_LOGPIO(imp_sth), "dbd_describe failed, dbd_st_execute #2...!\n"); + PerlIO_printf(DBIc_LOGPIO(imp_sth), "dbd_execute: have %i fields\n",DBIc_NUM_FIELDS(imp_sth)); } } else { @@ -2379,6 +2391,15 @@ sv_setpv(sv, cvbuf); break; #endif + case SQL_C_WCHAR: + if (ChopBlanks && fbh->ColSqlType == SQL_WCHAR && fbh->datalen > 0) { + WCHAR *p = (WCHAR*)fbh->data; + while(fbh->datalen && p[fbh->datalen-1]==L' ') { + --fbh->datalen; + } + } + sv_setwvn(sv,(WCHAR*)fbh->data,fbh->datalen/sizeof(WCHAR)); + break; default: if (ChopBlanks && fbh->ColSqlType == SQL_CHAR && fbh->datalen > 0) { char *p = (char*)fbh->data; @@ -2564,6 +2585,22 @@ } } + /* for Unicode string types, correct ftype */ + switch (phs->sql_type) { + case SQL_WCHAR: + case SQL_WVARCHAR: + case SQL_WLONGVARCHAR: + phs->ftype=SQL_C_WCHAR; + if (ODBC_TRACE_LEVEL(imp_sth)>=8) { + PerlIO_printf(DBIc_LOGPIO(imp_dbh),"_dbd_get_param_type: modified ftype to SQL_C_WCHAR\n"); + } + break; + default: + if (ODBC_TRACE_LEVEL(imp_sth)>=8) { + PerlIO_printf(DBIc_LOGPIO(imp_dbh),"_dbd_get_param_type: keeping ftype\n"); + } + break; + } } /* ==================================================================== */ @@ -2605,7 +2642,7 @@ /* pre-upgrade high to reduce risk of SvPVX realloc/move */ (void)SvUPGRADE(phs->sv, SVt_PVNV); /* ensure room for result, 28 is magic number (see sv_2pv) */ - SvGROW(phs->sv, (phs->maxlen < 28) ? 28 : phs->maxlen+1); + SvGROW(phs->sv, (phs->maxlen+sizeof(WCHAR) < 28) ? 28 : phs->maxlen+sizeof(WCHAR)); } else { /* phs->sv is copy of real variable, upgrade to at least string */ @@ -2624,16 +2661,6 @@ phs->sv_buf = SvPVX(phs->sv); value_len = 0; } - /* value_len has current value length now */ - phs->sv_type = SvTYPE(phs->sv); /* part of mutation check */ - phs->maxlen = SvLEN(phs->sv)-1; /* avail buffer space */ - - if (ODBC_TRACE_LEVEL(imp_sth) >= 3) { - PerlIO_printf(DBIc_LOGPIO(imp_dbh), "bind %s <== '%.100s' (len %ld/%ld, null %d)\n", - phs->name, SvOK(phs->sv) ? phs->sv_buf : "(null)", - (long)value_len,(long)phs->maxlen, SvOK(phs->sv)?0:1); - PerlIO_flush(DBIc_LOGPIO(imp_dbh)); - } /* ---------------------------------------------------------------- */ @@ -2649,6 +2676,33 @@ _dbd_get_param_type(sth, imp_sth, phs); + if (phs->ftype==SQL_C_WCHAR) { + if (ODBC_TRACE_LEVEL(imp_sth)>=8) { + PerlIO_printf(DBIc_LOGPIO(imp_dbh),"Need to modify phs->sv in place\n"); + } + if (ODBC_TRACE_LEVEL(imp_sth)>=8) { + PerlIO_printf(DBIc_LOGPIO(imp_dbh),"Need to modify phs->sv in place: old length = %i\n",value_len); + } + SV_toWCHAR(phs->sv); /* may modify SvPV(phs->sv), ... */ + phs->sv_buf=SvPV(phs->sv,value_len); /* ... so phs->sv_buf must be updated */ + if (ODBC_TRACE_LEVEL(imp_sth)>=8) { + PerlIO_printf(DBIc_LOGPIO(imp_dbh),"Need to modify phs->sv in place: new length = %i\n",value_len); + } + } + + /* value_len has current value length now */ + phs->sv_type = SvTYPE(phs->sv); /* part of mutation check */ + phs->maxlen = SvLEN(phs->sv)-sizeof(WCHAR); /* avail buffer space */ + + if (ODBC_TRACE_LEVEL(imp_sth) >= 3) { + PerlIO_printf(DBIc_LOGPIO(imp_dbh), "bind %s <== '%.100s' (len %ld/%ld, null %d)\n", + phs->name, SvOK(phs->sv) ? phs->sv_buf : "(null)", + (long)value_len,(long)phs->maxlen, SvOK(phs->sv)?0:1); + PerlIO_flush(DBIc_LOGPIO(imp_dbh)); + } + + + /* * JLU: was SQL_PARAM_OUTPUT only, but that caused a problem with * Oracle's drivers and in/out parameters. Can't be output only @@ -2807,6 +2861,11 @@ if (fCType == SQL_C_CHAR) { PerlIO_printf(DBIc_LOGPIO(imp_dbh), " Param value = %s\n", rgbValue); } + if (fCType==SQL_C_WCHAR) { + char * c1=PVallocW((WCHAR *)rgbValue); + PerlIO_printf(DBIc_LOGPIO(imp_dbh), " Param value = L'%s'\n", c1); + PVfreeW(c1); + } } rc = SQLBindParameter(imp_sth->hstmt, diff --text --unified --recursive --new-file DBD-ODBC-1.13\Makefile.PL DBD-ODBC-1.13-Unicode\Makefile.PL --- DBD-ODBC-1.13\Makefile.PL Fri Oct 29 15:44:12 2004 +++ DBD-ODBC-1.13-Unicode\Makefile.PL Fri Aug 12 12:57:58 2005 @@ -5,7 +5,7 @@ # You may distribute under the terms of either the GNU General Public # License or the Artistic License, as specified in the Perl README file. # -BEGIN { require 5.004 } # 5.004 is required for Win32 +BEGIN { require 5.008_001 } # 5.004 is required for Win32, 5.8.1 is required for proper Unicode support use Config; use ExtUtils::MakeMaker 5.16, qw(&WriteMakefile $Verbose); use File::Basename; diff --text --unified --recursive --new-file DBD-ODBC-1.13\MANIFEST DBD-ODBC-1.13-Unicode\MANIFEST --- DBD-ODBC-1.13\MANIFEST Mon May 31 15:50:34 2004 +++ DBD-ODBC-1.13-Unicode\MANIFEST Fri Aug 12 14:07:10 2005 @@ -7,6 +7,7 @@ README.informix README.RH9 README.hpux +README.unicode-patch ODBC.h ODBC.pm ODBC.xs @@ -15,6 +16,8 @@ dbivport.h fixup_c.h fixup_t.h +unicode_helper.c +unicode_helper.h t/01base.t t/02simple.t t/03dbatt.t @@ -24,7 +27,10 @@ t/09multi.t t/20SqlServer.t t/30Oracle.t +t/40UnicodeRoundTrip.t +t/41Unicode.t t/ODBCTEST.pm +t/UChelp.pm mytest/contest.pl mytest/coltest.pl mytest/joetest.pl diff --text --unified --recursive --new-file DBD-ODBC-1.13\ODBC.h DBD-ODBC-1.13-Unicode\ODBC.h --- DBD-ODBC-1.13\ODBC.h Sat Mar 06 18:53:58 2004 +++ DBD-ODBC-1.13-Unicode\ODBC.h Wed Aug 10 12:15:18 2005 @@ -18,6 +18,8 @@ #include /* from DBI. Load this after dbdodbc.h */ +#include "unicode_helper.h" + SV *odbc_get_info _((SV *dbh, int ftype)); int odbc_get_type_info _((SV *dbh, SV *sth, int ftype)); SV *odbc_col_attributes _((SV *sth, int colno, int desctype)); diff --text --unified --recursive --new-file DBD-ODBC-1.13\README.unicode-patch DBD-ODBC-1.13-Unicode\README.unicode-patch --- DBD-ODBC-1.13\README.unicode-patch Thu Jan 01 01:00:00 1970 +++ DBD-ODBC-1.13-Unicode\README.unicode-patch Sat Mar 04 16:24:22 2006 @@ -0,0 +1,233 @@ +=for humans +This file is written in the POD format, use pod2html or similar to view it. + +=head1 Unicode patch for DBD::ODBC 1.13 + +Date: 2006-03-04 + +=head2 Goals + +=over 4 + +=item * Allow an application on Win32 using DBD::ODBC to store and fetch Unicode +strings in a MS SQL Server 2000 database. + +=item * Minimal changes to the existing driver code. + +=item * Don't break working code. + +=item * Make the DBD::ODBC "do the right thing" with the UTF8 flag. + +=item * A first step towards proper Unicode support for DBD:ODBC. + +=back + + +=head2 Legal disclaimer + +The Unicode patch for DBD::ODBC was initially written by Alexander Foken +(alexander at foken dot de). I wrote this patch during my working hours for a +project that needs Unicode support in DBD::ODBC on Windows, and I have the +permission of my former employer to publish this patch. + +You may distribute under the terms of either the GNU General Public License or +the Artistic License, as specified in the Perl README file. + +=head2 Applying the Patch + +=over 4 + +=item 1. get DBD::ODBC 1.13 from CPAN + +=item 2. unpack it + +=item 3. C DBD-ODBC-1.13-Unicode.patch> + +=item 4. build DBD::ODBC as usual (C, C, C, +C) + +=back + +=head2 Tested Databases + +=over 4 + +=item Microsoft SQL Server 2000 SP3 + +Works, using the ODBC driver from the MS SQL 2000 CDROM. + +You need to install SP1, SP2, and SP3 on both client and server, because +DBD::ODBC needs a recent set of MDAC libraries. + +=item Oracle 9.2 + +Works, using the ODBC driver from the Oracle 9.2 CDROM. + +You have to set the environment variables C and +C (or any other language setting ending with +"C<.AL32UTF8>") before loading the patched DBD::ODBC to make Oracle return +Unicode data. (See also "Oracle and Unicode" in the POD of DBD::Oracle.) + +And you have to enable the "C" Workaround in the Oracle +ODBC Driver Configuration to make Oracle return Unicode to a non-Unicode +application. Alternatively, you can include "C" in your connect string. + +Better try to use DBD::Oracle to access Oracle with less overhead and better +support for Oracle-specific stuff. + +=item Oracle 8.x + +Not tested but known not to support Unicode very well. + +Quoting the DBD::Oracle man page: "I [...] I" + +=item MS Access 2000 + +Unicode tests fail because MS Access 2000 seems not to give any hints about +Unicode, so all data is treated as non-Unicode. You do not want to use this +combination. + +You may want to try the MSDE, it has the SQL Server engine, but with a lower +connection limit and without GUI tools. There are several 3rd party GUIs for the +MSDE. + +=item PostgreSQL 8.0.3 + +Some tests from the original DBD::ODBC 1.13 fail with PostgreSQL 8.0.3, so you +may not want to use DBD::ODBC to connect to PostgreSQL 8.0.3. + +Unicode tests fail because PostgreSQL seems not to give any hints about Unicode, +so all data is treated as non-Unicode. + +Better try to use DBD::Pg to access PostgreSQL with less overhead and better +support for PostgreSQL-specific stuff. DBD::Pg has a driver attribute named +C, set it to 1 and you have proper Unicode support. + +=back + +=head2 Tested Operating Systems and ODBC Managers + +=over 4 + +=item MS Windows 2000 Professional and Server, using the standard ODBC Manager +from Microsoft. + +=back + +(Yes, this list should be longer.) + +=head2 Known Problems + +Perl 5.8.1 or newer is required. Older Perl before 5.8.0 lacked proper Unicode +support. Perl 5.8.0 lacks some auxillary functions for Unicode. + +Unicode is supported only for SQL statement parameters and data returned by the +fetch methods, SQL statements are still treated as native encoding. If you need +a unicode constant in an SQL statement, you have to pass it as parameter or use +SQL functions to convert your constant from native encoding to Unicode. + +All data passed to the patched DBD::ODBC for C, C, +C, and C is treated as Unicode, even if it is +not Unicode. F should check the UTF8 flag of the scalar and +pass a value different from C as first argument to +C. The problem is to know what encoding is used for the +data in the scalar. + +Binding of unicode output parameters is untested (I don't need them) and likely +to fail. + +The patched DBD::ODBC may fail to compile on non-Win32 platforms. It needs a +header file named F defining at least the following: + +=over 4 + +=item A C data type capable of storing a single Unicode character. + +Microsoft uses C in F, and C in F. + +=item C + +C for wide characters. Microsoft declares this function in both +F and F. + +=item C + +C for wide characters, returns character count, not bytes. Microsoft +declares this function in both F and F. + +=item WideCharToMultiByte() and MultiByteToWideChar() functions + +Encoding converter functions. WideChar in this context means the native Unicode +representation of the ODBC API (UCS-2 or UTF-16LE for Windows), MultiByte in +this context means Perls native Unicode representation (UTF-8). Microsoft +declares the two functions in F. The C argument tells the +function that the MultiByte string is in UTF-8 encoding. + +=back + +=head2 Technical + +This patch adds support for C, C, C, and +C. Strings written to columns that are reported as +C, C, or C are automatically +converted to 16 bit Unicode using the Windows API function +C, return values reported as C, +C, C, or C are converted back to 8 +bit Unicode (UTF-8) using the Windows API function C and +have Perl's UTF8 flag set except for empty strings. + +=head2 Tests + +This patch adds two new tests, F and F. +Test 40 checks that Unicode strings can be entered as bound parameters and are +returned unmodified. Test 41 creates a table, writes and reads unicode data +using various bind variants. + +When using Oracle, the empty string test in F is skipped +because Oracle converts empty strings to NULL in this situation. + +I had to add C, C, C to +F, because Oracle in the setup described above returns Unicode +more often than expected. + +I added F, that exports two utility functions for unicode string +tests: + +=over 4 + +=item C + +Dumps a string, indicating its Unicode flag, length and all characters in ASCII +notation. + +=item C + +Compares two strings that may be contain Unicode, and calls C or +C. + +=back + +=head2 See also + +=over 4 + +=item * Microsoft ODBC documentation + +=item * Microsoft API documentation + +=item * http://www.unicode.org/ + +=item * DBI + +=item * DBD::ODBC + +=item * DBD::Oracle + +=item * DBD::Pg + +=back + diff --text --unified --recursive --new-file DBD-ODBC-1.13\t\40UnicodeRoundTrip.t DBD-ODBC-1.13-Unicode\t\40UnicodeRoundTrip.t --- DBD-ODBC-1.13\t\40UnicodeRoundTrip.t Thu Jan 01 01:00:00 1970 +++ DBD-ODBC-1.13-Unicode\t\40UnicodeRoundTrip.t Fri Aug 12 11:55:56 2005 @@ -0,0 +1,108 @@ +#!/usr/bin/perl -w -I./t +# based on *Id: 20SqlServer.t 568 2004-11-08 15:12:37Z jurl * + +use strict; +use warnings; +use UChelp; + +use Test::More; +use DBI qw(:sql_types); + +$|=1; + +my $WAIT=0; +my @data; +my $tests; +# to help ActiveState's build process along by behaving (somewhat) if a dsn is not provided +BEGIN { + @data=( + "hello ASCII: the quick brown fox jumps over the yellow dog", + "Hello Unicode: german umlauts (\x{00C4}\x{00D6}\x{00DC}\x{00E4}\x{00F6}\x{00FC}\x{00DF}) smile (\x{263A}) hebrew shalom (\x{05E9}\x{05DC}\x{05D5}\x{05DD})", + ); + push @data,map { "again $_" } @data; + utf8::is_utf8($data[0]) and die "Perl set UTF8 flag on non-unicode string constant"; + utf8::is_utf8($data[1]) or die "Perl did not set UTF8 flag on unicode string constant"; + utf8::is_utf8($data[2]) and die "Perl set UTF8 flag on non-unicode string constant"; + utf8::is_utf8($data[3]) or die "Perl did not set UTF8 flag on unicode string constant"; + unshift @data,''; + push @data,42; + + my @plaindata=grep { !utf8::is_utf8($_) } @data; + @plaindata or die "OOPS"; + + $tests=2+6*@data+6*@plaindata; + + if ($] < 5.008001) { + plan skip_all => "Old Perl lacking unicode support"; + } elsif (!defined $ENV{DBI_DSN}) { + plan skip_all => "DBI_DSN is undefined"; + } else { + plan tests => $tests, + } +} + +my $dbh=DBI->connect(); +ok(defined($dbh),"DBI connect"); + +my $dbname=$dbh->get_info(17); # DBI::SQL_DBMS_NAME +SKIP: { + my ($len,$fromdual,$skipempty); + if ($dbname=~/Microsoft SQL Server/i) { + ($len,$fromdual,$skipempty)=('LEN','',0); + } elsif ($dbname=~/Oracle/i) { + ($len,$fromdual,$skipempty)=('LENGTH','FROM DUAL',1); + } elsif ($dbname=~/PostgreSQL/i) { + ($len,$fromdual,$skipempty)=('LENGTH','',0); + } elsif ($dbname=~/ACCESS/i) { + ($len,$fromdual,$skipempty)=('LEN','',0); + } else { + skip "Tests not supported using $dbname",$tests-1; + } + + $dbh->{RaiseError} = 1; + $dbh->{'LongTruncOk'}=1; + $dbh->{'LongReadLen'}=32000; + + + foreach my $txt (@data) { + SKIP: { + if ($skipempty and ($txt eq '')) { + skip('Database is known to treat empty strings as NULL in this test',12); + } + unless (utf8::is_utf8($txt)) { + my $sth=$dbh->prepare("SELECT ? as roundtrip, $len(?) as roundtriplen $fromdual"); + ok(defined($sth),"prepare round-trip select statement plaintext"); + + # diag(dumpstr($txt)); + $sth->bind_param (1,$txt,SQL_VARCHAR); + $sth->bind_param (2,$txt,SQL_VARCHAR); + pass("bind VARCHAR"); + $sth->execute(); + pass("execute"); + my ($t,$tlen)=$sth->fetchrow_array(); + pass('fetch'); + cmp_ok($tlen,'==',length($txt),'length equal'); + utf_eq_ok($t,$txt,'text equal'); + } + + my $sth=$dbh->prepare("SELECT ? as roundtrip, $len(?) as roundtriplen $fromdual"); + ok(defined($sth),"prepare round-trip select statement unicode"); + + $sth->bind_param (1,$txt,SQL_WVARCHAR); + $sth->bind_param (2,$txt,SQL_WVARCHAR); + pass("bind WVARCHAR"); + $sth->execute(); + pass("execute"); + my ($t,$tlen)=$sth->fetchrow_array(); + pass('fetch'); + cmp_ok($tlen,'==',length($txt),'length equal'); + utf_eq_ok($t,$txt,'text equal'); + } + } + + $dbh->disconnect; + + pass("all done"); +} + +exit 0; diff --text --unified --recursive --new-file DBD-ODBC-1.13\t\41Unicode.t DBD-ODBC-1.13-Unicode\t\41Unicode.t --- DBD-ODBC-1.13\t\41Unicode.t Thu Jan 01 01:00:00 1970 +++ DBD-ODBC-1.13-Unicode\t\41Unicode.t Fri Aug 12 12:03:22 2005 @@ -0,0 +1,135 @@ +#!/usr/bin/perl -w -I./t +# based on *Id: 20SqlServer.t 568 2004-11-08 15:12:37Z jurl * + +use strict; +use warnings; +use UChelp; + +use Test::More; +use DBI qw(:sql_types); + +$|=1; + +my $WAIT=0; +my @data; +my $tests; +# to help ActiveState's build process along by behaving (somewhat) if a dsn is not provided +BEGIN { + @data=( + "hello ASCII: the quick brown fox jumps over the yellow dog", + "Hello Unicode: german umlauts (\x{00C4}\x{00D6}\x{00DC}\x{00E4}\x{00F6}\x{00FC}\x{00DF}) smile (\x{263A}) hebrew shalom (\x{05E9}\x{05DC}\x{05D5}\x{05DD})", + ); + push @data,map { "again $_" } @data; + utf8::is_utf8($data[0]) and die "Perl set UTF8 flag on non-unicode string constant"; + utf8::is_utf8($data[1]) or die "Perl did not set UTF8 flag on unicode string constant"; + utf8::is_utf8($data[2]) and die "Perl set UTF8 flag on non-unicode string constant"; + utf8::is_utf8($data[3]) or die "Perl did not set UTF8 flag on unicode string constant"; + + $tests=7+12*@data; + + if ($] < 5.008001) { + plan skip_all => "Old Perl lacking unicode support"; + } elsif (!defined $ENV{DBI_DSN}) { + plan skip_all => "DBI_DSN is undefined"; + } else { + plan tests => $tests, + } +} + +my $dbh=DBI->connect(); +ok(defined($dbh),"DBI connect"); + +my $dbname = $dbh->get_info(17); # DBI::SQL_DBMS_NAME +SKIP: { + my ($sth,$NVARCHAR); + if ($dbname=~/Microsoft SQL Server/i) { + ($NVARCHAR)=('NVARCHAR(1000)'); + } elsif ($dbname=~/Oracle/i) { + ($NVARCHAR)=('NVARCHAR2(1000)'); + } elsif ($dbname=~/PostgreSQL/i) { + ($NVARCHAR)=('VARCHAR(1000)'); + } elsif ($dbname=~/ACCESS/i) { + ($NVARCHAR)=('MEMO'); + } else { + skip "Tests not supported using $dbname",$tests-1; + } + + $dbh->{RaiseError} = 1; + $dbh->{'LongTruncOk'}=1; + $dbh->{'LongReadLen'}=32000; + + eval { + local $dbh->{PrintError}=0; + $dbh->do("DROP TABLE PERL_DBD_TABLE1"); + }; + pass("Drop old test table"); + + $dbh->{RaiseError} = 1; + + $dbh->do(<<__SQL__); +CREATE TABLE + PERL_DBD_TABLE1 + ( + i INTEGER PRIMARY KEY, + nva $NVARCHAR, + nvb $NVARCHAR, + nvc $NVARCHAR + ) +__SQL__ + + pass("Create test table"); + + + # Insert records into the database: + $sth=$dbh->prepare("INSERT INTO PERL_DBD_TABLE1 (i,nva,nvb,nvc) values (?,?,?,?)"); + ok(defined($sth),"prepare insert statement"); + for (my $i=0; $i<@data; $i++) { + my ($nva,$nvb,$nvc)=($data[$i]) x 3; + $sth->bind_param (1, $i, SQL_INTEGER); + pass("Bind parameter SQL_INTEGER"); + $sth->bind_param (2, $nva); + pass("Bind parameter default"); + $sth->bind_param (3, $nvb, SQL_WVARCHAR); + pass("Bind parameter SQL_WVARCHAR"); + $sth->bind_param (4, $nvc, SQL_WVARCHAR); + pass("Bind parameter SQL_WVARCHAR"); + $sth->execute(); + pass("execute()"); + } + $sth->finish(); + + # Retrieve records from the database, and see if they match original data: + $sth=$dbh->prepare("SELECT i,nva,nvb,nvc FROM PERL_DBD_TABLE1"); + ok(defined($sth),'prepare select statement'); + $sth->execute(); + pass('execute select statement'); + while (my ($i,$nva,$nvb,$nvc)=$sth->fetchrow_array()) { + my $info=sprintf("(index=%i, Unicode=%s)",$i,utf8::is_utf8($data[$i]) ? 'on' : 'off'); + pass("fetch select statement $info"); + cmp_ok(utf8::is_utf8($nva),'>=',utf8::is_utf8($data[$i]),"utf8 flag $info col1"); + utf_eq_ok($nva,$data[$i],"value matches $info col1"); + + cmp_ok(utf8::is_utf8($nvb),'>=',utf8::is_utf8($data[$i]),"utf8 flag $info col2"); + utf_eq_ok($nva,$data[$i],"value matches $info col2"); + + cmp_ok(utf8::is_utf8($nvc),'>=',utf8::is_utf8($data[$i]),"utf8 flag $info col3"); + utf_eq_ok($nva,$data[$i],"value matches $info col3"); + } + + $WAIT && eval { + print "you may want to look at the table now, the unicode data is damaged!\nHit Enter to continue\n"; + $_=; + + }; + + # eval { + # local $dbh->{RaiseError} = 0; + # $dbh->do("DROP TABLE PERL_DBD_TABLE1"); + # }; + + $dbh->disconnect; + + pass("all done"); +} + +exit 0; diff --text --unified --recursive --new-file DBD-ODBC-1.13\t\ODBCTEST.pm DBD-ODBC-1.13-Unicode\t\ODBCTEST.pm --- DBD-ODBC-1.13\t\ODBCTEST.pm Fri Oct 29 03:19:52 2004 +++ DBD-ODBC-1.13-Unicode\t\ODBCTEST.pm Thu Aug 11 16:45:46 2005 @@ -38,8 +38,8 @@ # ODBC's value is -5. %TestFieldInfo = ( 'COL_A' => [SQL_SMALLINT,-5, SQL_TINYINT, SQL_NUMERIC, SQL_DECIMAL, SQL_FLOAT, SQL_REAL], - 'COL_B' => [SQL_VARCHAR, SQL_CHAR], - 'COL_C' => [SQL_LONGVARCHAR, -1], + 'COL_B' => [SQL_VARCHAR, SQL_CHAR, SQL_WVARCHAR, SQL_WCHAR], + 'COL_C' => [SQL_LONGVARCHAR, -1, SQL_WLONGVARCHAR], 'COL_D' => [SQL_TYPE_TIMESTAMP, SQL_TYPE_DATE, SQL_DATE, SQL_TIMESTAMP ], ); diff --text --unified --recursive --new-file DBD-ODBC-1.13\t\UChelp.pm DBD-ODBC-1.13-Unicode\t\UChelp.pm --- DBD-ODBC-1.13\t\UChelp.pm Thu Jan 01 01:00:00 1970 +++ DBD-ODBC-1.13-Unicode\t\UChelp.pm Thu Aug 11 17:51:02 2005 @@ -0,0 +1,51 @@ +package UChelp; + +use strict; +use warnings; +use base 'Exporter'; +our @EXPORT=qw(dumpstr utf_eq_ok); +use Test::More; + +sub dumpstr($) +{ + my $str=shift; + if (defined $str) { + my ($f,$u)=utf8::is_utf8($str) ? ('\\x{%04X}','utf8') : ('\\x%02X','bytes'); + (my $d=$str)=~s/([^\x20-\x7E])/sprintf($f,ord $1)/gse; + return sprintf("[%s, %i chars] '%s'",$u,length($str),$d); + } else { + return 'undef'; + } +} + +sub utf_eq_ok($$$) +{ + my ($a,$b,$msg)=@_; + + # I want to call Test::More routines in a way that makes this package invisible, + # and shows the failed or passed line of the caller instead. + # So I manipulate @_ and use goto \&func. + + (!defined($a) and !defined($b)) and return pass($msg); + unless (defined($a) and defined($b)) { + diag(defined($a) ? "Expected undef, got '$a'" : "Got undef, expected '$b'"); + @_=($msg); + goto \&fail; + # see below for the reason of goto + } + + if ($a eq $b) { + @_=($msg); + goto \&pass; + } + + if ("\x{2a36}$a" eq "\x{2a36}$b") { # implicit upgrade to UTF8 + @_=($msg); + goto \&pass; + } + + @_=(dumpstr($a),'eq',dumpstr($b),$msg); + goto \&cmp_ok; +} + +1; diff --text --unified --recursive --new-file DBD-ODBC-1.13\unicode_helper.c DBD-ODBC-1.13-Unicode\unicode_helper.c --- DBD-ODBC-1.13\unicode_helper.c Thu Jan 01 01:00:00 1970 +++ DBD-ODBC-1.13-Unicode\unicode_helper.c Mon Aug 15 10:23:24 2005 @@ -0,0 +1,252 @@ +#include "ODBC.h" + +typedef enum { do_new=1, do_cat, do_set } new_cat_set_t; + +/* + * If len>=0, wp is an array of wide characters without a termination character. + * If len==-1, wp is a null-terminated wide string + */ +static SV * _dosvwv(SV * sv, WCHAR * wp, STRLEN len, new_cat_set_t mode) +{ + char * p=NULL; + int bytes; + STRLEN svlen; + + bytes=WideCharToMultiByte(CP_UTF8,0,wp,len,NULL,0,NULL,NULL); + Newz(0,p,1+bytes,char); + if (bytes!=0) { + if(!WideCharToMultiByte(CP_UTF8,0,wp,len,p,bytes,NULL,NULL)) { + int err=GetLastError(); + switch (err) { + case ERROR_INSUFFICIENT_BUFFER: + croak("_dosvwv: WideCharToMultiByte() failed: insufficient buffer"); + case ERROR_INVALID_FLAGS: + croak("_dosvwv: WideCharToMultiByte() failed: invalid flags"); + case ERROR_INVALID_PARAMETER: + croak("_dosvwv: WideCharToMultiByte() failed: invalid parameter"); + default: + croak("_dosvwv: WideCharToMultiByte() failed: error code %i",err); + } + } + } + svlen=(len==-1 ? strlen(p) : bytes); + switch (mode) { + case do_new: + sv=newSVpvn(p,svlen); + break; + case do_cat: + sv_catpvn(sv,p,svlen); + break; + case do_set: + sv_setpvn(sv,p,svlen); + break; + default: + croak("_dosvwv called with bad mode value"); + } + if (*p) { + SvUTF8_on(sv); + } else if (mode!=do_cat) { + SvUTF8_off(sv); /* Don't switch off UTF8 just because we *APPENDED* an empty string! sv may still be UTF8. */ + } + Safefree(p); + return sv; +} + +/* + * If len!=0, wp is an array of wide characters without a termination character. + * If len==0, wp is a null-terminated wide string + * len==-1 is not allowed (but not enforced) + */ + +SV * newSVwv(WCHAR * wp, STRLEN len) +{ + SV * sv; + + if (wp==NULL) { + sv=newSVpv(NULL,len); + } else { + sv=_dosvwv(NULL,wp,(len==0) ? -1 : len,do_new); + } + return sv; +} + +/* + * wp is an array of wide characters without a termination character. + * len==-1 is not allowed (but not enforced) + */ +SV * newSVwvn(WCHAR * wp, STRLEN len) +{ + SV * sv; + + if (wp==NULL) { + sv=newSVpv(NULL,len); + } else { + sv=(len==0) ? newSVpvn("",0) : _dosvwv(NULL,wp,len,do_new); + } + return sv; +} + +/* wp is an array of wide characters without a termination character. */ +void sv_catwvn(SV * sv, WCHAR * wp, STRLEN len) +{ + if (wp==NULL) { + sv_catpvn(sv,NULL,len); + } else if (len==0) { + sv_catpvn(sv,"",0); + } else { + _dosvwv(sv,wp,len,do_cat); + } +} + +/* wp is null-terminated string */ +void sv_catwv(SV * sv, WCHAR * wp) +{ + STRLEN len; + if (wp==NULL) { + sv_catpv(sv,NULL); + } else { + len=wcslen(wp); + if (len==0) { + sv_catpvn(sv,"",0); + } else { + _dosvwv(sv,wp,len,do_cat); + } + } +} + +/* wp is an array of wide characters without a termination character. */ +void sv_setwvn(SV * sv, WCHAR * wp, STRLEN len) +{ + if (wp==NULL) { + sv_setpvn(sv,NULL,len); + } else if (len==0) { + sv_setpvn(sv,"",0); + } else { + _dosvwv(sv,wp,len,do_set); + } +} + + +/* wp is null-terminated string */ +void sv_setwv(SV * sv, WCHAR * wp) +{ + STRLEN len; + if (wp==NULL) { + sv_catpv(sv,NULL); + } else { + len=wcslen(wp); + if (len==0) { + sv_setpvn(sv,"",0); + } else { + _dosvwv(sv,wp,len,do_set); + } + } +} + + +/* + * *bufbytes will contain the number of bytes returned, not including the trailing null bytes + */ +WCHAR * _SvWV(SV * sv, int * bufbytes) +{ + int len; + char * p; + int widechars=0; + WCHAR * buf=NULL; + p=SvPVutf8(sv,len); + /* len is the number of characters in sv, not counting the trailing '\0' */ + if (p!=NULL) { + /* work with a byte array, not a null-terminated string (len!=-1) */ + widechars=MultiByteToWideChar(CP_UTF8,0,p,len,NULL,0); + /* allocate one extra character for a trailing null. Zero out the memory. */ + Newz(0,buf,1+widechars,WCHAR); + MultiByteToWideChar(CP_UTF8,0,p,len,buf,1+widechars); + } + if (bufbytes!=NULL) *bufbytes=widechars*sizeof(WCHAR); + return buf; +} + +/* + * Does not handle byte arrays, only null-terminated strings. + */ + +WCHAR * WValloc(char * s) +{ + WCHAR * buf=NULL; + if (NULL!=s) { + int widechars=MultiByteToWideChar(CP_UTF8,0,s,-1,NULL,0); + Newz(0,buf,widechars+1,WCHAR); + if (widechars!=0) { + MultiByteToWideChar(CP_UTF8,0,s,-1,buf,widechars); + } + } + return buf; +} + +void WVfree(WCHAR * wp) +{ + if (wp!=NULL) Safefree(wp); +} + +/* + * Does not handle byte arrays, only null-terminated strings. + */ + +char * PVallocW(WCHAR * wp) +{ + char * retval=NULL; + if (wp!=NULL) { + int bytes=WideCharToMultiByte(CP_UTF8,0,wp,-1,NULL,0,NULL,NULL); + Newz(0,retval,bytes,char); + if (!WideCharToMultiByte(CP_UTF8,0,wp,-1,retval,bytes,NULL,NULL)) { + croak("WideCharToMultiByte() failed"); + } + } + return retval; +} + +void PVfreeW(char * s) +{ + if (s!=NULL) Safefree(s); +} + +/* + * Mutate an SV's PV INPLACE to contain UTF-16. Does not handle byte arrays, only null-terminated strings. + * Turns the UTF8 flag OFF unconditionally, because SV becomes a byte array (for Perl). + */ +void SV_toWCHAR(SV * sv) +{ + STRLEN len; + WCHAR * wp; + char * p; + if (!SvOK(sv)) { + /* warn("SV_toWCHAR called for undef"); */ + return; + } + p=SvPVutf8_force(sv,len); + /* _force makes sure SV is only a string */ + wp=WValloc(p); + len=wcslen(wp); + p=SvGROW(sv,sizeof(WCHAR)*(1+len)); + wcscpy((WCHAR *)p,wp); + SvCUR_set(sv,sizeof(WCHAR)*len); + WVfree(wp); + SvPOK_only(sv); /* sv is nothing but a non-UTF8 string -- for Perl ;-) */ +} + +/* + * Mutate an SV's PV INPLACE to contain UTF-8. Does not handle WCHAR arrays, only null-terminated strings. + * Turns the UTF8 flag ON if the result is not empty, OFF if the result is empty. + */ +void SV_fromWCHAR(SV * sv) +{ + STRLEN dummy; + char * p; + + if (!SvOK(sv) || !SvPOK(sv)) return; + p=PVallocW((WCHAR *)SvPV_force(sv,dummy)); /* _force makes sure SV is only a string */ + sv_setpv(sv,p); + SvPOK_only(sv); /* sv is nothing but a string */ + if (*p) SvUTF8_on(sv); /* sv is a UTF8 string */ + PVfreeW(p); +} diff --text --unified --recursive --new-file DBD-ODBC-1.13\unicode_helper.h DBD-ODBC-1.13-Unicode\unicode_helper.h --- DBD-ODBC-1.13\unicode_helper.h Thu Jan 01 01:00:00 1970 +++ DBD-ODBC-1.13-Unicode\unicode_helper.h Wed Aug 10 14:37:02 2005 @@ -0,0 +1,77 @@ +#ifndef unicode_helper_h +#define unicode_helper_h + +#include + +/* + * Create a new SV * from a WCHAR * + * SV contains UTF-8 representation of wp, has UTF8-Flag on except for empty string + * The difference between newSVwv() and newSVwvn() is the handling of len==0: + * newSVwvn returns an empty string, newSVwv auto-detects the length of wp + * len is the number of WCHARs, not bytes. + */ +SV * newSVwv(WCHAR * wp, STRLEN len); +SV * newSVwvn(WCHAR * wp, STRLEN len); + +/* + * Get a WCHAR * representation of a char * + * The representation is a converted copy, so the result needs to be freed usng WVfree(). + * char * s == NULL is handled properly + */ +WCHAR * WValloc(char * s); + +/* + * Free a WCHAR * representation of a char * + * Used to free the return values of WValloc(), SvWV(), SvWV_nolen() + * wp == NULL is handled properly + */ +void WVfree(WCHAR * wp); + +/* + * Get a WCHAR * representation of an SV *, similar to SvPV() and SvPV_nolen() + * The representation is a converted copy, so the result needs to be freed using WVfree(). + * len counts BYTES, not characters + */ +#define SvWV_nolen(sv) _SvWV((sv),NULL) +#define SvWV(sv,len) _SvWV((sv),&len) +WCHAR * _SvWV(SV * sv, int * bufbytes); /* PRIVATE, do not call exept via the two macros above */ + +/* + * Set the string value of an SV* to a representation of a WCHAR * value, similar to sv_setpvn() and sv_setpv() + * SV contains UTF-8 representation of wp, has UTF8-Flag on except for empty string + */ +void sv_setwvn(SV * sv, WCHAR * wp, STRLEN len); +void sv_setwv(SV * sv, WCHAR * wp); + +/* Append a TCHAR * to the string value of an SV* , similar to sv_catpvn() and sv_catpv() */ +void sv_catwvn(SV * sv, WCHAR * wp, STRLEN len); +void sv_catwv(SV * sv, WCHAR * wp); + + +/* + * Get a char * representation of a WCHAR * + * The representation is a converted copy, so the result needs to be freed using PVfree(). + * TCHAR * wp == NULL is handled properly + */ +char * PVallocW(WCHAR * wp); + +/* + * Free a WCHAR * representation of a char * + * Used to free the return value of PVallocW() + * char * s == NULL is handled properly + */ +void PVfreeW(char * s); + +/* + * Mutate an SV's PV INPLACE to contain UTF-16 + * Turns the UTF8 flag OFF unconditionally + */ +void SV_toWCHAR(SV * sv); + +/* + * Mutate an SV's PV INPLACE to contain UTF-8 + * Turns the UTF8 flag ON if the result is not empty + */ +void SV_fromWCHAR(SV * sv); + +#endif /* defined unicode_helper_h */