# Problem_12_5_1.r
# Adult-onset diabites table: frequency data of a particular allele in a sample of 
# such diabetics and a sample of nondiabetics
tableA=data.frame(
  Diabetic=rbind(Bborbb=12.,
                 BB=39),
  Normal=rbind(Bborbb=4.,
                 BB=49))


# 
# 2. Conduct ChiSquare Test of Independence ----

# Custom function implementing chisqtest
fcn.chisqtest<-function(tableA){
  
cat("\n Two-Way Table: \n")
print(tableA)

n.total=sum(as.vector(tableA))
cat("\n Total Counts in Table:  ", n.total,"\n")

# Compute marginal probabilities of
# TattooStatus and of HepCStatus
probs.TattooStatus=rowSums(tableA)/n.total
probs.HepCStatus=colSums(tableA)/n.total
cat("\n  MLEs of  row level probabilities\n")
print(probs.TattooStatus)
cat("\n  MLEs of  column level probabilities\n")
print(probs.HepCStatus)

# Compute table of fitted cell probabilities and
#   expected counts assuming independence of two factors
tableA.fittedprobs=as.matrix(probs.TattooStatus)%*% t(
  as.matrix(probs.HepCStatus) )
cat("\n Fitted cell probabilities assuming independence\n")
print(tableA.fittedprobs)

tableA.expected=n.total* tableA.fittedprobs
cat("\n Expected Counts assuming independence \n")
print(tableA.expected)


# Compute standardized residuals fitted table
tableA.chisqresiduals=((tableA - tableA.expected))/sqrt(tableA.expected)
cat("\n Table of Chi-Square Residuals  by cell\n")
print(tableA.chisqresiduals)

# Compute table of chi-square test statistic contributions
tableA.chisqterms=((tableA - tableA.expected)^2)/tableA.expected
cat("\n Table of Chi-Square statistic terms by cell\n")
print(tableA.chisqterms)

tableA.chisqStatistic=sum(as.vector(tableA.chisqterms))
cat("\n Chi-Square Statistic: ",tableA.chisqStatistic,"\n")
df.tableA=(nrow(tableA)-1)*(ncol(tableA)-1)
cat("\n degrees of freedom: ", df.tableA, "\n")
tableA.chisqStatistic.pvalue=1-
  pchisq(tableA.chisqStatistic, df=df.tableA)
cat("\n P-Value :  ", tableA.chisqStatistic.pvalue, "\n\n")

}

fcn.chisqtest(tableA)
## 
##  Two-Way Table: 
##        Diabetic Normal
## Bborbb       12      4
## BB           39     49
## 
##  Total Counts in Table:   104 
## 
##   MLEs of  row level probabilities
##    Bborbb        BB 
## 0.1538462 0.8461538 
## 
##   MLEs of  column level probabilities
##  Diabetic    Normal 
## 0.4903846 0.5096154 
## 
##  Fitted cell probabilities assuming independence
##          Diabetic     Normal
## Bborbb 0.07544379 0.07840237
## BB     0.41494083 0.43121302
## 
##  Expected Counts assuming independence 
##         Diabetic    Normal
## Bborbb  7.846154  8.153846
## BB     43.153846 44.846154
## 
##  Table of Chi-Square Residuals  by cell
##          Diabetic    Normal
## Bborbb  1.4829346 -1.454686
## BB     -0.6323254  0.620280
## 
##  Table of Chi-Square statistic terms by cell
##         Diabetic    Normal
## Bborbb 2.1990950 2.1161103
## BB     0.3998355 0.3847473
## 
##  Chi-Square Statistic:  5.099788 
## 
##  degrees of freedom:  1 
## 
##  P-Value :   0.02392877
#  The p-Value is 0.0239, which is significant at nominal alpha/size level of 0.05.

# From the output, we see that the expected counts assuming indepndence are
# all greater than 5, so we are not overly concerned with
# the chi-square distribution approximation to the chi-square statistic
# under the null hypothesis of independence (of row and column factors)