Code Comments
Programming Forum and web based access to our favorite programming groups.Hi, I compared one element with others in a Hash (size : about 7,000).
------------------------------------------------------------------------
for(word1 in arrword)
{
for(word2 in arrword)
{
# compare word1 with word2.
}
}
------------------------------------------------------------------------
But, in every iteration, the memory-usage increased.
Somebody help me?
Thanks.
Remi.
Post Follow-up to this messageiaminsik <iaminsik@gmail.com> writes:
> Hi, I compared one element with others in a Hash (size : about 7,000).
>
> ------------------------------------------------------------------------
> for(word1 in arrword)
> {
> for(word2 in arrword)
> {
> # compare word1 with word2.
> }
> }
> ------------------------------------------------------------------------
>
> But, in every iteration, the memory-usage increased.
>
> Somebody help me?
I would guess your comparison procedure is causing memory allocation,
but you haven't said what it's doing.
Cheers,
- Joel
Post Follow-up to this messageOn 3=BF=F918=C0=CF, =BF=C0=C8=C412=BD=C341=BA=D0, Joel Reicher <j...@panacea
=
.null.org> wrote:
> iaminsik <iamin...@gmail.com> writes:
>
>
>
>
> I would guess your comparison procedure is causing memory allocation,
> but you haven't said what it's doing.
I experimented word-similarity with predicates.
My source code is very long.... In the sub-routines, I frequently
split a tring into an array, and delete the array.
If you have some times, let me know memory bugs...
print "calc similarity";
for(word1 in arrword)
{
I1 =3D I_of_word(arrword[word1], cnt_all_noun_type);
for(word2 in arrword)
{
if(word1 !=3D word2 && arrwordfreq[word1]>10 &&
arrwordfreq[word2]>10 )
#if(word1 !=3D word2 )
{
I2 =3D I_of_word(arrword[word2], cnt_all_noun_type);
ret =3D Co_I_of_words(arrword[word1], cnt_all_noun_type, word2);
ln=3Dsplit(ret, arrret, SUBSEP);
ISame =3D arrret[1];
ISame =3D ISame * 2;
I =3D ISame / ( I1 + I2);
if( I )
print word1 "(" arrwordfreq[word1] ")", I, word2 "("
arrwordfreq[word2] ")", arrret[2], arrret[3] > "Dekang_over10.txt";
delete arrret;
}
}
}
}
# Co-Information Sum between Two Words
function Co_I_of_words (features,cnt_all_type,word2) # divided by [|]
{
cofeatures1=3Dcofeatures2=3D"";
I=3D0.0;
word1num=3Dsplit(features,arrfeature2,"[|]");
for(i=3D1;i<=3Dword1num;i++)
{
# check whether word2 has the same feature.
if(arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]] !=3D 0)
{
I +=3D I_of_feature(arrfeature2[i], cnt_all_type);
arrfeaturestr=3Darrfeature2[i];
gsub(SUBSEP, "/", arrfeaturestr);
cofeatures2=3Dcofeatures2 (cofeatures2=3D=3D""?"":"|") arrfeaturestr \=
"(" arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]]
")";
cofeatures1=3Dcofeatures1 (cofeatures1=3D=3D""?"":"|") arrfeaturestr \=
"(" arrwordfeaturerelation[word1 SUBSEP arrfeature2[i]]
")";
}
else
delete arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]];
}
delete arrfeature2;
retval=3DI SUBSEP cofeatures1 SUBSEP cofeatures2;
return retval;
}
# Information Sum of Word
function I_of_word (features,cnt_all_type) # divided by [|]
{
I=3D0.0;
word1num=3Dsplit(features,arrfeature1,"[|]");
for(i=3D1;i<=3Dword1num;i++)
{
I +=3D I_of_feature(arrfeature1[i], cnt_all_type);
}
delete arrfeature1;
return I;
}
# Information A Feature
function I_of_feature (feature_relation,cnt_all_type) # divided by [|]
{
lchild=3Dsplit(arrfeature[feature_relati
on],div,"[|]");
delete div;
return -log(lchild / cnt_all_type);
}
# NonDuplicateString
function InsertArrayString (ArrString, newbie)
{
n=3Dsplit(ArrString, Container, "[|]");
bflag=3D0;
for(i=3D1;i<=3Dn;i++)
{
if(Container[i]=3D=3Dnewbie) bflag=3D1;
}
delete Container;
if(bflag=3D=3D0)
ArrString =3D ArrString (ArrString=3D=3D""?"":"|") newbie;
return ArrString;
}
# Get Function String
function CleanStringToFunc (String)
{
retstr=3D"";
n=3Dsplit(String, Arrstr, " [+] ");
for(i=3D1;i<=3Dn;i++)
{
if(Arrstr[i] ~ /\/fjc/)
{
tempstr=3DArrstr[i];
gsub(/^[^\/]*\//,"",tempstr);
delete Arrstr;
return tempstr;
}
}
delete Arrstr;
return "NULL";
}
# CleanString
function CleanStringToVerb (String)
{
retstr=3D"";
n=3Dsplit(String, Arrstr, " [+] ");
for(i=3D1;i<=3Dn;i++)
{
if(Arrstr[i] ~ /\/CMC/ || Arrstr[i] ~ /\/YBDO/ \
|| Arrstr[i] ~ /\/YBHO/ \
|| Arrstr[i] ~ /\/fpd/ || Arrstr[i] ~ /\/fph/)
{
if(retstr=3D=3D"") retstr =3D retstr Arrstr[i];
else retstr =3D retstr " + " Arrstr[i];
}
}
delete Arrstr;
return retstr;
}
Thanks!
Remi.
>
> Cheers,
>
> - Joel- =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BC=FB=B1=E2=B1=E2 -
>
> - =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BA=B8=B1=E2 -
Post Follow-up to this message
On 3/17/2008 11:30 PM, iaminsik wrote:
> On 3=BF=F918=C0=CF, =BF=C0=C8=C412=BD=C341=BA=D0, Joel Reicher <j...@pa=
nacea.null.org> wrote:
>=20
--
--
>=20
>=20
> I experimented word-similarity with predicates.
> My source code is very long.... In the sub-routines, I frequently
> split a tring into an array, and delete the array.
>=20
> If you have some times, let me know memory bugs...
>=20
> print "calc similarity";
> for(word1 in arrword)
> {
> I1 =3D I_of_word(arrword[word1], cnt_all_noun_type);
> for(word2 in arrword)
> {
> if(word1 !=3D word2 && arrwordfreq[word1]>10 &&
> arrwordfreq[word2]>10 )
> #if(word1 !=3D word2 )
> {
> I2 =3D I_of_word(arrword[word2], cnt_all_noun_type);
> ret =3D Co_I_of_words(arrword[word1], cnt_all_noun_type, word2)=
;
> ln=3Dsplit(ret, arrret, SUBSEP);
> ISame =3D arrret[1];
> ISame =3D ISame * 2;
> I =3D ISame / ( I1 + I2);
> if( I )
> print word1 "(" arrwordfreq[word1] ")", I, word2 "("
> arrwordfreq[word2] ")", arrret[2], arrret[3] > "Dekang_over10.txt";
> delete arrret;
> }
> }
> }
> }
>=20
> # Co-Information Sum between Two Words
> function Co_I_of_words (features,cnt_all_type,word2) # divided by [|]
> {
> cofeatures1=3Dcofeatures2=3D"";
> I=3D0.0;
> word1num=3Dsplit(features,arrfeature2,"[|]");
> for(i=3D1;i<=3Dword1num;i++)
> {
> # check whether word2 has the same feature.
> if(arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]] !=3D 0)
> {
> I +=3D I_of_feature(arrfeature2[i], cnt_all_type);
> arrfeaturestr=3Darrfeature2[i];
> gsub(SUBSEP, "/", arrfeaturestr);
> cofeatures2=3Dcofeatures2 (cofeatures2=3D=3D""?"":"|") arrfeature=
str \
> "(" arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]]
> ")";
> cofeatures1=3Dcofeatures1 (cofeatures1=3D=3D""?"":"|") arrfeature=
str \
> "(" arrwordfeaturerelation[word1 SUBSEP arrfeature2[i]]
> ")";
> }
> else
> delete arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]];
> }
> delete arrfeature2;
>=20
> retval=3DI SUBSEP cofeatures1 SUBSEP cofeatures2;
> return retval;
> }
>=20
> # Information Sum of Word
> function I_of_word (features,cnt_all_type) # divided by [|]
> {
> I=3D0.0;
> word1num=3Dsplit(features,arrfeature1,"[|]");
> for(i=3D1;i<=3Dword1num;i++)
> {
> I +=3D I_of_feature(arrfeature1[i], cnt_all_type);
> }
> delete arrfeature1;
>=20
> return I;
> }
>=20
> # Information A Feature
> function I_of_feature (feature_relation,cnt_all_type) # divided by [|]
> {
> lchild=3Dsplit(arrfeature[feature_relati
on],div,"[|]");
> delete div;
> return -log(lchild / cnt_all_type);
> }
>=20
> # NonDuplicateString
> function InsertArrayString (ArrString, newbie)
> {
> n=3Dsplit(ArrString, Container, "[|]");
> bflag=3D0;
> for(i=3D1;i<=3Dn;i++)
> {
> if(Container[i]=3D=3Dnewbie) bflag=3D1;
> }
> delete Container;
> if(bflag=3D=3D0)
> ArrString =3D ArrString (ArrString=3D=3D""?"":"|") newbie;
> return ArrString;
> }
>=20
> # Get Function String
> function CleanStringToFunc (String)
> {
> retstr=3D"";
> n=3Dsplit(String, Arrstr, " [+] ");
> for(i=3D1;i<=3Dn;i++)
> {
> if(Arrstr[i] ~ /\/fjc/)
> {
> tempstr=3DArrstr[i];
> gsub(/^[^\/]*\//,"",tempstr);
> delete Arrstr;
> return tempstr;
> }
> }
> delete Arrstr;
> return "NULL";
> }
>=20
> # CleanString
> function CleanStringToVerb (String)
> {
> retstr=3D"";
> n=3Dsplit(String, Arrstr, " [+] ");
> for(i=3D1;i<=3Dn;i++)
> {
> if(Arrstr[i] ~ /\/CMC/ || Arrstr[i] ~ /\/YBDO/ \
> || Arrstr[i] ~ /\/YBHO/ \
> || Arrstr[i] ~ /\/fpd/ || Arrstr[i] ~ /\/fph/)
> {
> if(retstr=3D=3D"") retstr =3D retstr Arrstr[i];
> else retstr =3D retstr " + " Arrstr[i];
> }
> }
> delete Arrstr;
> return retstr;
> }
>=20
> Thanks!
> Remi.
>=20
>=20
>=20
>=20
It may be possible for some elements of arrwordfeaturerelation[] not to g=
et
deleted since in Co_I_of_words() you only delete them if the condition
"arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]] !=3D 0" is false (th=
ough I
don't see where you increment that array element anyway so I assume it's =
in some
piece of code you didn't post) and you only delete the word2-indexed entr=
ies
delete arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]]
while in your code you use both word2 and word1indices and so create arra=
y
entries for both:
cofeatures2=3D...arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]=
]
cofeatures1=3D...arrwordfeaturerelation[word1 SUBSEP arrfeature2[i]=
]
Regards,
Ed.
Post Follow-up to this messageOn 3=BF=F918=C0=CF, =BF=C0=C8=C48=BD=C343=BA=D0, Ed Morton <mor...@lsupcaemn = t.com> wrote: > On 3/17/2008 11:30 PM, iaminsik wrote: > > > > > acea.null.org> wrote: > > > - - > > > > > > > tr \ tr \ > > > > > > > > > > > > > It may be possible for some elements of arrwordfeaturerelation[] not to ge=[/color ] t > deleted since in Co_I_of_words() you only delete them if the condition > "arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]] !=3D 0" is false (tho= ugh I > don't see where you increment that array element anyway so I assume it's i=[/color ] n some > piece of code you didn't post) and you only delete the word2-indexed entri=[/color ] es > > delete arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]] > In the 'else' control structure where I deleted an element of arrwordfeaturerelation, I tried to check that if I refer an empty element of HASH in the 'if' condition, something(null value) will be created. I mean it is a verification code. > while in your code you use both word2 and word1indices and so create array=[/color ] > entries for both: > > cofeatures2=3D...arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]]= > cofeatures1=3D...arrwordfeaturerelation[word1 SUBSEP arrfeature2[i]]= > In my algorithm, both of 'arrwordfeaturerelation[word1 SUBSEP arrfeature2[i]]' and 'arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]]' cannot generate an empty element in HASH. But, I will check whether or not. > Regards, > > Ed.- =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BC=FB=B1=E2=B1=E2 - > > - =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BA=B8=B1=E2 - Thanks for your regards. Remi.
Post Follow-up to this messageOn 3/18/2008 7:53 AM, iaminsik wrote: > On 3=BF=F918=C0=CF, =BF=C0=C8=C48=BD=C343=BA=D0, Ed Morton <mor...@lsup= caemnt.com> wrote: >=20 anacea.null.org> wrote: ). ---- ---- ); estr \ estr \ get though I s in some tries >=20 >=20 > In the 'else' control structure where I deleted an element of > arrwordfeaturerelation, > I tried to check that if I refer an empty element of HASH in the 'if' > condition, something(null value) will be created. > I mean it is a verification code. > If you want to check for an index being present in an array, use the "in"= operator: if (index in array) ... don't create the element then destroy it: if (!array[index]) delete array[index] >=20 ray i]] i]] >=20 >=20 > In my algorithm, both of 'arrwordfeaturerelation[word1 SUBSEP > arrfeature2[i]]' and 'arrwordfeaturerelation[word2 SUBSEP > arrfeature2[i]]' > cannot generate an empty element in HASH. I don't know what that means. The above code allocates 2 pieces of memory= in the array whether or not that array is being used as a hash table. > But, I will check whether or not. OK. Ed.
Post Follow-up to this messageOn 3=BF=F918=C0=CF, =BF=C0=C8=C410=BD=C311=BA=D0, Ed Morton <mor...@lsupcaem = nt.com> wrote: > On 3/18/2008 7:53 AM, iaminsik wrote: > > > > > aemnt.com> wrote: > > nacea.null.org> wrote: > > . > ---- ---- > > > > > > ; > str \ str \ > > > > > > > > > > > > get hough I in some ries > > > > If you want to check for an index being present in an array, use the "in" =[/color ] operator: > > if (index in array) ... > > don't create the element then destroy it: > > if (!array[index]) > delete array[index] Yes, this is what I want to know since this Feb. If I create an element in 'if' condition and delete it, will the speed of program be heavily slower? If it be slower, what causes the happening? Internal-max-hash-size? or simple 'insert-delete' behavior? > ay > ]] ]] > > > I don't know what that means. The above code allocates 2 pieces of memory =[/color ] in the > array whether or not that array is being used as a hash table. In my flow of program, word1 and word2 shares the same features. It means if 'arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]]' has a real element, (which is guaranteed by the previous 'if' condition) then 'arrwordfeaturerelation[word1 SUBSEP arrfeature2[i]]' also should have a real element. > > > OK. Thanks again for your thoughts. > > Ed.- =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BC=FB=B1=E2=B1=E2 - > > - =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BA=B8=B1=E2 -
Post Follow-up to this messageOn 3/18/2008 9:12 AM, iaminsik wrote: > On 3=BF=F918=C0=CF, =BF=C0=C8=C410=BD=C311=BA=D0, Ed Morton <mor...@lsu= pcaemnt.com> wrote: <snip> n" operator: >=20 >=20 > Yes, this is what I want to know since this Feb. > If I create an element in 'if' condition and delete it, will the speed > of program be heavily slower? > If it be slower, what causes the happening? > Internal-max-hash-size? or simple 'insert-delete' behavior? That's a little like asking "if instead of using sub() I write my own fun= ction to do substitutions, would it be slower and if so why?". The immediate an= swer is "it's pointless to write that code since a better alternative is part of = the language so who cares?". Maybe someone who, unlike me, actually does care= and knows the answer will respond.... Ed.
Post Follow-up to this messageOn 3=BF=F918=C0=CF, =BF=C0=C8=C411=BD=C337=BA=D0, Ed Morton <mor...@lsupcaem = nt.com> wrote: > On 3/18/2008 9:12 AM, iaminsik wrote: > > > > > caemnt.com> wrote: > <snip> " operator: > > > > > > That's a little like asking "if instead of using sub() I write my own func=[/color ] tion > to do substitutions, would it be slower and if so why?". The immediate ans=[/color ] wer is > "it's pointless to write that code since a better alternative is part of t=[/color ] he > language so who cares?". Maybe someone who, unlike me, actually does care =[/color ] and > knows the answer will respond.... > > Ed.- =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BC=FB=B1=E2=B1=E2 - > > - =B5=FB=BF=C2 =C5=D8=BD=BA=C6=AE =BA=B8=B1=E2 - Ok. That makes sense. Thanks a lot. Really x 1000000! Have a good day. Best Regards. Remi.
Post Follow-up to this messageOn Mar 17, 9:30 pm, iaminsik <iamin...@gmail.com> wrote:
> On 3=BF=F918=C0=CF, =BF=C0=C8=C412=BD=C341=BA=D0, Joel Reicher <j...@panac=[/color
]
ea.null.org> wrote:
>
>
>
>
--
--
>
>
>
>
> I experimented word-similarity with predicates.
> My source code is very long.... In the sub-routines, I frequently
> split a tring into an array, and delete the array.
>
> If you have some times, let me know memory bugs...
>
> print "calc similarity";
> for(word1 in arrword)
> {
> I1 =3D I_of_word(arrword[word1], cnt_all_noun_type);
> for(word2 in arrword)
> {
> if(word1 !=3D word2 && arrwordfreq[word1]>10 &&
> arrwordfreq[word2]>10 )
> #if(word1 !=3D word2 )
> {
> I2 =3D I_of_word(arrword[word2], cnt_all_noun_type);
> ret =3D Co_I_of_words(arrword[word1], cnt_all_noun_type, word2);
> ln=3Dsplit(ret, arrret, SUBSEP);
> ISame =3D arrret[1];
> ISame =3D ISame * 2;
> I =3D ISame / ( I1 + I2);
> if( I )
> print word1 "(" arrwordfreq[word1] ")", I, word2 "("
> arrwordfreq[word2] ")", arrret[2], arrret[3] > "Dekang_over10.txt";
> delete arrret;
> }
> }
> }
>
> }
>
> # Co-Information Sum between Two Words
> function Co_I_of_words (features,cnt_all_type,word2) # divided by [|]
> {
> cofeatures1=3Dcofeatures2=3D"";
> I=3D0.0;
> word1num=3Dsplit(features,arrfeature2,"[|]");
> for(i=3D1;i<=3Dword1num;i++)
> {
> # check whether word2 has the same feature.
> if(arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]] !=3D 0)
> {
> I +=3D I_of_feature(arrfeature2[i], cnt_all_type);
> arrfeaturestr=3Darrfeature2[i];
> gsub(SUBSEP, "/", arrfeaturestr);
> cofeatures2=3Dcofeatures2 (cofeatures2=3D=3D""?"":"|") arrfeaturestr=[/color
]
\
> "(" arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]]
> ")";
> cofeatures1=3Dcofeatures1 (cofeatures1=3D=3D""?"":"|") arrfeaturestr=[/color
]
\
> "(" arrwordfeaturerelation[word1 SUBSEP arrfeature2[i]]
> ")";
> }
> else
> delete arrwordfeaturerelation[word2 SUBSEP arrfeature2[i]];
> }
> delete arrfeature2;
>
> retval=3DI SUBSEP cofeatures1 SUBSEP cofeatures2;
> return retval;
>
> }
>
> # Information Sum of Word
> function I_of_word (features,cnt_all_type) # divided by [|]
> {
> I=3D0.0;
> word1num=3Dsplit(features,arrfeature1,"[|]");
> for(i=3D1;i<=3Dword1num;i++)
> {
> I +=3D I_of_feature(arrfeature1[i], cnt_all_type);
> }
> delete arrfeature1;
>
> return I;
>
> }
>
> # Information A Feature
> function I_of_feature (feature_relation,cnt_all_type) # divided by [|]
> {
> lchild=3Dsplit(arrfeature[feature_relati
on],div,"[|]");
> delete div;
> return -log(lchild / cnt_all_type);
>
> }
>
> # NonDuplicateString
> function InsertArrayString (ArrString, newbie)
> {
> n=3Dsplit(ArrString, Container, "[|]");
> bflag=3D0;
> for(i=3D1;i<=3Dn;i++)
> {
> if(Container[i]=3D=3Dnewbie) bflag=3D1;
> }
> delete Container;
> if(bflag=3D=3D0)
> ArrString =3D ArrString (ArrString=3D=3D""?"":"|") newbie;
> return ArrString;
>
> }
>
> # Get Function String
> function CleanStringToFunc (String)
> {
> retstr=3D"";
> n=3Dsplit(String, Arrstr, " [+] ");
> for(i=3D1;i<=3Dn;i++)
> {
> if(Arrstr[i] ~ /\/fjc/)
> {
> tempstr=3DArrstr[i];
> gsub(/^[^\/]*\//,"",tempstr);
> delete Arrstr;
> return tempstr;
> }
> }
> delete Arrstr;
> return "NULL";
>
> }
>
> # CleanString
> function CleanStringToVerb (String)
> {
> retstr=3D"";
> n=3Dsplit(String, Arrstr, " [+] ");
> for(i=3D1;i<=3Dn;i++)
> {
> if(Arrstr[i] ~ /\/CMC/ || Arrstr[i] ~ /\/YBDO/ \
> || Arrstr[i] ~ /\/YBHO/ \
> || Arrstr[i] ~ /\/fpd/ || Arrstr[i] ~ /\/fph/)
> {
> if(retstr=3D=3D"") retstr =3D retstr Arrstr[i];
> else retstr =3D retstr " + " Arrstr[i];
> }
> }
> delete Arrstr;
> return retstr;
>
> }
>
> Thanks!
> Remi.
>
>
>
>
>
One thing I noticed is that there are no local variables in your
subroutines.
I would declare then (in the "function" statement) so there would be,
for example, no chance of "i" in one routine conflicting with "i" in
another.
Post Follow-up to this messagePowered by vBulletin
Copyright 2000-2006 Jelsoft Enterprises Limited.